diff --git a/CMakeLists.txt b/CMakeLists.txt
index 722046ec4b7666892a358e74b76ee0cf07aad77e..e8bdef9cdcd999e217e8cbae194abb021f529e9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,7 +22,7 @@ source_group("Source Files\\jamidht\\eth\\libdevcore" FILES ${Source_Files__jami
 source_group("Source Files\\jamidht\\eth\\libdevcrypto" FILES ${Source_Files__jamidht__eth__libdevcrypto})
 source_group("Source Files\\media" FILES ${Source_Files__media})
 source_group("Source Files\\media\\audio" FILES ${Source_Files__media__audio})
-source_group("Source Files\\media\\audio\\echo-cancel" FILES ${Source_Files__media__audio__echo_cancel})
+source_group("Source Files\\media\\audio\\audio-processing" FILES ${Source_Files__media__audio__audio_processing})
 source_group("Source Files\\media\\audio\\sound" FILES ${Source_Files__media__audio__sound})
 source_group("Source Files\\media\\video" FILES ${Source_Files__media__video})
 source_group("Source Files\\plugin" FILES ${Source_Files__plugin})
@@ -55,7 +55,7 @@ list (APPEND ALL_FILES
       ${Source_Files__media}
       ${Source_Files__media__audio}
       ${Source_Files__media__audio__sound}
-      ${Source_Files__media__audio__echo_cancel}
+      ${Source_Files__media__audio__audio_processing}
       ${Source_Files__media__video}
       ${Source_Files__security}
       ${Source_Files__sip}
@@ -286,4 +286,4 @@ if(MSVC)
 
    set ( CMAKE_STATIC_LINKER_FLAGS ${libAdditionalDependencies} )
 
-endif()
\ No newline at end of file
+endif()
diff --git a/meson.build b/meson.build
index db4e6ed98636c184612a58b206ee65241c17ab70..d016b62de7b092450c7d33e6aadbf2549ce66a46 100644
--- a/meson.build
+++ b/meson.build
@@ -29,7 +29,7 @@ deplibavformat = dependency('libavformat', version: '>= 56.40.101')
 deplibswscale = dependency('libswscale', version: '>= 3.1.101')
 deplibswresample = dependency('libswresample', version: '>= 1.2.101')
 deplibavutil = dependency('libavutil', version: '>= 55.75.100')
-depspeexdsp = dependency('speexdsp')
+
 depfmt = dependency('fmt', version: '>= 5.3')
 
 depyamlcpp = dependency('yaml-cpp', version: '>= 0.5.1', required: false)
@@ -113,6 +113,9 @@ conf.set10('HAVE_RINGNS', depopenssl.found())
 depwebrtcap = dependency('webrtc-audio-processing', required: get_option('aec'))
 conf.set10('HAVE_WEBRTC_AP', depwebrtcap.found())
 
+depspeexdsp = dependency('speexdsp')
+conf.set10('HAVE_SPEEXDSP', depspeexdsp.found())
+
 if get_option('video')
     conf.set('ENABLE_VIDEO', true)
     if host_machine.system() == 'linux' and meson.get_compiler('cpp').get_define('__ANDROID__') != '1'
diff --git a/src/jami/videomanager_interface.h b/src/jami/videomanager_interface.h
index 6a444f76166b36f6232894199ff1a1862235dcad..3c001db5a32eabec924b6bd30a5975b8a16e9a6f 100644
--- a/src/jami/videomanager_interface.h
+++ b/src/jami/videomanager_interface.h
@@ -105,6 +105,7 @@ public:
     float calcRMS() const;
     jami::AudioFormat getFormat() const;
     size_t getFrameSize() const;
+    bool has_voice {false};
 
 private:
     void setFormat(const jami::AudioFormat& format);
diff --git a/src/media/audio/CMakeLists.txt b/src/media/audio/CMakeLists.txt
index a6f71f24a679922c1732b238a32c02b336be9112..8da543827ae0a568399775306956592443da9c4c 100644
--- a/src/media/audio/CMakeLists.txt
+++ b/src/media/audio/CMakeLists.txt
@@ -50,5 +50,5 @@ endif()
 add_subdirectory(sound)
 set (Source_Files__media__audio__sound ${Source_Files__media__audio__sound} PARENT_SCOPE)
 
-add_subdirectory(echo-cancel)
-set (Source_Files__media__audio__echo_cancel ${Source_Files__media__audio__echo_cancel} PARENT_SCOPE)
\ No newline at end of file
+add_subdirectory(audio-processing)
+set (Source_Files__media__audio__audio_processing ${Source_Files__media__audio__audio_processing} PARENT_SCOPE)
diff --git a/src/media/audio/Makefile.am b/src/media/audio/Makefile.am
index 2e883492d531de5271dcf02b7ca56070d3b4392b..53b997c7ee9053929ea69dd94fb5e92bac4cf100 100644
--- a/src/media/audio/Makefile.am
+++ b/src/media/audio/Makefile.am
@@ -53,7 +53,7 @@ noinst_HEADERS += $(RING_SPEEXDSP_HEAD) \
 
 
 include ./media/audio/sound/Makefile.am
-include ./media/audio/echo-cancel/Makefile.am
+include ./media/audio/audio-processing/Makefile.am
 
 if BUILD_OPENSL
 include ./media/audio/opensl/Makefile.am
diff --git a/src/media/audio/audio-processing/CMakeLists.txt b/src/media/audio/audio-processing/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5de9632ce970a6407713c3345add76d0fd8c83f
--- /dev/null
+++ b/src/media/audio/audio-processing/CMakeLists.txt
@@ -0,0 +1,14 @@
+################################################################################
+# Source groups - audio-processing
+################################################################################
+list (APPEND Source_Files__media__audio__audio_processing
+      "${CMAKE_CURRENT_SOURCE_DIR}/audio_processor.h"
+      "${CMAKE_CURRENT_SOURCE_DIR}/null_audio_processor.h"
+      "${CMAKE_CURRENT_SOURCE_DIR}/null_audio_processor.cpp"
+      "${CMAKE_CURRENT_SOURCE_DIR}/speex.h"
+      "${CMAKE_CURRENT_SOURCE_DIR}/speex.cpp"
+      "${CMAKE_CURRENT_SOURCE_DIR}/webrtc.h"
+      "${CMAKE_CURRENT_SOURCE_DIR}/webrtc.cpp"
+)
+
+set (Source_Files__media__audio__audio_processing ${Source_Files__media__audio__audio_processing} PARENT_SCOPE)
diff --git a/src/media/audio/audio-processing/Makefile.am b/src/media/audio/audio-processing/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..1002802a86ca79f641777c65054b65206d81d74d
--- /dev/null
+++ b/src/media/audio/audio-processing/Makefile.am
@@ -0,0 +1,24 @@
+noinst_LTLIBRARIES += libaudioprocessing.la
+
+EC_SRC = ./media/audio/audio-processing/null_audio_processor.cpp
+EC_HDR = ./media/audio/audio-processing/null_audio_processor.h
+
+if BUILD_SPEEXDSP
+EC_SRC += ./media/audio/audio-processing/speex.cpp
+EC_HDR += ./media/audio/audio-processing/speex.h
+endif
+
+if HAVE_WEBRTC_AP
+EC_SRC += ./media/audio/audio-processing/webrtc.cpp
+EC_HDR += ./media/audio/audio-processing/webrtc.h
+libaudioprocessing_la_CXXFLAGS = @WEBRTC_CFLAGS@ $(AM_CXXFLAGS)
+endif
+
+libaudioprocessing_la_SOURCES = \
+	$(EC_SRC)
+
+noinst_HEADERS += \
+	./media/audio/audio-processing/audio_processor.h \
+	$(EC_HDR)
+
+libaudio_la_LIBADD += libaudioprocessing.la
diff --git a/src/media/audio/audio-processing/audio_processor.h b/src/media/audio/audio-processing/audio_processor.h
new file mode 100644
index 0000000000000000000000000000000000000000..e89289bcd3a197f4fa271ec3b3b0c99dc8be0367
--- /dev/null
+++ b/src/media/audio/audio-processing/audio_processor.h
@@ -0,0 +1,180 @@
+/*
+ *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#pragma once
+
+#include "noncopyable.h"
+#include "audio/audio_frame_resizer.h"
+#include "audio/resampler.h"
+#include "audio/audiobuffer.h"
+#include "libav_deps.h"
+
+#include <atomic>
+#include <memory>
+
+namespace jami {
+
+class AudioProcessor
+{
+private:
+    NON_COPYABLE(AudioProcessor);
+
+public:
+    AudioProcessor(AudioFormat format, unsigned frameSize)
+        : playbackQueue_(format, (int) frameSize)
+        , recordQueue_(format, (int) frameSize)
+        , resampler_(new Resampler)
+        , format_(format)
+        , frameSize_(frameSize)
+        , frameDurationMs_((unsigned int) (frameSize_ * (1.0 / format_.sample_rate) * 1000))
+    {}
+    virtual ~AudioProcessor() = default;
+
+    virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf)
+    {
+        recordStarted_ = true;
+        if (!playbackStarted_)
+            return;
+        enqueue(recordQueue_, std::move(buf));
+    };
+    virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf)
+    {
+        playbackStarted_ = true;
+        if (!recordStarted_)
+            return;
+        auto copy = buf;
+        enqueue(playbackQueue_, std::move(copy));
+    };
+
+    /**
+     * @brief Process and return a single AudioFrame
+     */
+    virtual std::shared_ptr<AudioFrame> getProcessed() = 0;
+
+    /**
+     * @brief Set the status of echo cancellation
+     */
+    virtual void enableEchoCancel(bool enabled) = 0;
+
+    /**
+     * @brief Set the status of noise suppression
+     * includes de-reverb, de-noise, high pass filter, etc
+     */
+    virtual void enableNoiseSuppression(bool enabled) = 0;
+
+    /**
+     * @brief Set the status of automatic gain control
+     */
+    virtual void enableAutomaticGainControl(bool enabled) = 0;
+
+protected:
+    AudioFrameResizer playbackQueue_;
+    AudioFrameResizer recordQueue_;
+    std::unique_ptr<Resampler> resampler_;
+    std::atomic_bool playbackStarted_;
+    std::atomic_bool recordStarted_;
+    AudioFormat format_;
+    unsigned int frameSize_;
+    unsigned int frameDurationMs_;
+
+    // artificially extend voice activity by this long
+    unsigned int forceMinimumVoiceActivityMs {1000};
+
+    // current number of frames to force the voice activity to be true
+    unsigned int forceVoiceActiveFramesLeft {0};
+
+    // voice activity must be active for this long _before_ it is considered legitimate
+    unsigned int minimumConsequtiveDurationMs {200};
+
+    // current number of frames that the voice activity has been true
+    unsigned int consecutiveActiveFrames {0};
+
+    /**
+     * @brief Helper method for audio processors, should be called at start of getProcessed()
+     *        Pops frames from audio queues if there's overflow
+     * @returns True if there is underflow, false otherwise. An AudioProcessor should
+     *          return a blank AudioFrame if there is underflow.
+     */
+    bool tidyQueues()
+    {
+        while (recordQueue_.samples() > recordQueue_.frameSize() * 10) {
+            JAMI_DBG("record overflow %d / %d", recordQueue_.samples(), frameSize_);
+            recordQueue_.dequeue();
+        }
+        while (playbackQueue_.samples() > playbackQueue_.frameSize() * 10) {
+            JAMI_DBG("playback overflow %d / %d", playbackQueue_.samples(), frameSize_);
+            playbackQueue_.dequeue();
+        }
+        if (recordQueue_.samples() < recordQueue_.frameSize()
+            || playbackQueue_.samples() < playbackQueue_.frameSize()) {
+            // If there are not enough samples in either queue, we can't
+            // process anything.
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * @brief Stablilizes voice activity
+     * @param voiceStatus the voice status that was detected by the audio processor
+     *                    for the current frame
+     * @returns The voice activity status that should be set on the current frame
+     */
+    bool getStabilizedVoiceActivity(bool voiceStatus)
+    {
+        bool newVoice = false;
+
+        if (voiceStatus) {
+            // we detected activity
+            consecutiveActiveFrames += 1;
+
+            // make sure that we have been active for necessary time
+            if (consecutiveActiveFrames > minimumConsequtiveDurationMs / frameDurationMs_) {
+                newVoice = true;
+
+                // set number of frames that will be forced positive
+                forceVoiceActiveFramesLeft = (int) forceMinimumVoiceActivityMs / frameDurationMs_;
+            }
+        } else if (forceVoiceActiveFramesLeft > 0) {
+            // if we didn't detect voice, but we haven't elapsed the minimum duration,
+            // force voice to be true
+            newVoice = true;
+            forceVoiceActiveFramesLeft -= 1;
+
+            consecutiveActiveFrames += 1;
+        } else {
+            // else no voice and no need to force
+            newVoice = false;
+            consecutiveActiveFrames = 0;
+        }
+
+        return newVoice;
+    }
+
+private:
+    void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf)
+    {
+        if (buf->getFormat() != format_) {
+            auto resampled = resampler_->resample(std::move(buf), format_);
+            frameResizer.enqueue(std::move(resampled));
+        } else
+            frameResizer.enqueue(std::move(buf));
+    };
+};
+
+} // namespace jami
diff --git a/src/media/audio/audio-processing/null_audio_processor.cpp b/src/media/audio/audio-processing/null_audio_processor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..191584da1ecfb65350137ce65660c2a680fe3fa1
--- /dev/null
+++ b/src/media/audio/audio-processing/null_audio_processor.cpp
@@ -0,0 +1,45 @@
+/*
+ *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#include "null_audio_processor.h"
+
+#include <cassert>
+
+namespace jami {
+
+NullAudioProcessor::NullAudioProcessor(AudioFormat format, unsigned frameSize)
+    : AudioProcessor(format, frameSize)
+{
+    JAMI_DBG("[null_audio] NullAudioProcessor, frame size = %d (=%d ms), channels = %d",
+             frameSize,
+             frameDurationMs_,
+             format.nb_channels);
+}
+
+std::shared_ptr<AudioFrame>
+NullAudioProcessor::getProcessed()
+{
+    if (tidyQueues()) {
+        return {};
+    }
+
+    playbackQueue_.dequeue();
+    return recordQueue_.dequeue();
+};
+
+} // namespace jami
diff --git a/src/media/audio/echo-cancel/null_echo_canceller.h b/src/media/audio/audio-processing/null_audio_processor.h
similarity index 66%
rename from src/media/audio/echo-cancel/null_echo_canceller.h
rename to src/media/audio/audio-processing/null_audio_processor.h
index 2039d3277d7c179b88e31838b0d4641e7fd9f7b4..18c3288c498a5b67462a64a6742975718e84b8e4 100644
--- a/src/media/audio/echo-cancel/null_echo_canceller.h
+++ b/src/media/audio/audio-processing/null_audio_processor.h
@@ -1,8 +1,6 @@
 /*
  *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
  *
- *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 3 of the License, or
@@ -20,20 +18,23 @@
 
 #pragma once
 
-#include "echo_canceller.h"
+#include "audio_processor.h"
 
 namespace jami {
 
-class NullEchoCanceller final : public EchoCanceller
+class NullAudioProcessor final : public AudioProcessor
 {
 public:
-    NullEchoCanceller(AudioFormat format, unsigned frameSize);
-    ~NullEchoCanceller() = default;
+    NullAudioProcessor(AudioFormat format, unsigned frameSize);
+    ~NullAudioProcessor() = default;
 
-    void putRecorded(std::shared_ptr<AudioFrame>&& buf) override;
-    void putPlayback(const std::shared_ptr<AudioFrame>& buf) override;
     std::shared_ptr<AudioFrame> getProcessed() override;
-    void done() override;
+
+    void enableEchoCancel(bool) override {};
+
+    void enableNoiseSuppression(bool) override {};
+
+    void enableAutomaticGainControl(bool) override {};
 };
 
 } // namespace jami
diff --git a/src/media/audio/audio-processing/speex.cpp b/src/media/audio/audio-processing/speex.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8d4ed45ee62cf2f3f399938fabcf562e70733ac6
--- /dev/null
+++ b/src/media/audio/audio-processing/speex.cpp
@@ -0,0 +1,220 @@
+/*
+ *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#include "speex.h"
+
+#include "audio/audiolayer.h"
+#include <cstdint>
+#include <memory>
+#include <speex/speex_config_types.h>
+#include <vector>
+
+extern "C" {
+#include <speex/speex_echo.h>
+#include <speex/speex_preprocess.h>
+}
+
+namespace jami {
+
+SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
+    : AudioProcessor(format, frameSize)
+    , echoState(speex_echo_state_init_mc((int) frameSize,
+                                         (int) frameSize * 16,
+                                         (int) format.nb_channels,
+                                         (int) format.nb_channels),
+                &speex_echo_state_destroy)
+    , iProcBuffer(frameSize_, format)
+{
+    JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d",
+             frameSize,
+             frameDurationMs_,
+             format.nb_channels);
+    // set up speex echo state
+    speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
+
+    // speex specific value to turn feature on (need to pass a pointer to it)
+    spx_int32_t speexOn = 1;
+
+    // probability integers, i.e. 50 means 50%
+    // vad will be true if speex's raw probability calculation is higher than this in any case
+    spx_int32_t probStart = 99;
+
+    // vad will be true if voice was active last frame
+    //     AND speex's raw probability calculation is higher than this
+    spx_int32_t probContinue = 90;
+
+    // maximum noise suppression in dB (negative)
+    spx_int32_t maxNoiseSuppress = -50;
+
+    // set up speex preprocess states, one for each channel
+    // note that they are not enabled here, but rather in the enable* functions
+    for (unsigned int i = 0; i < format.nb_channels; i++) {
+        auto channelPreprocessorState
+            = SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize,
+                                                                  (int) format.sample_rate),
+                                      &speex_preprocess_state_destroy);
+
+        // set max noise suppression level
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_NOISE_SUPPRESS,
+                             &maxNoiseSuppress);
+
+        // set up voice activity values
+        speex_preprocess_ctl(channelPreprocessorState.get(), SPEEX_PREPROCESS_SET_VAD, &speexOn);
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_PROB_START,
+                             &probStart);
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_PROB_CONTINUE,
+                             &probContinue);
+
+        // keep track of this channel's preprocessor state
+        preprocessorStates.push_back(std::move(channelPreprocessorState));
+    }
+
+    JAMI_INFO("[speex-dsp] Done initializing");
+}
+
+void
+SpeexAudioProcessor::enableEchoCancel(bool enabled)
+{
+    JAMI_DBG("[speex-dsp] enableEchoCancel %d", enabled);
+    // need to set member variable so we know to do it in getProcessed
+    shouldAEC = enabled;
+
+    if (enabled) {
+        // reset the echo canceller
+        speex_echo_state_reset(echoState.get());
+
+        for (auto& channelPreprocessorState : preprocessorStates) {
+            // attach our already-created echo canceller
+            speex_preprocess_ctl(channelPreprocessorState.get(),
+                                 SPEEX_PREPROCESS_SET_ECHO_STATE,
+                                 echoState.get());
+        }
+    } else {
+        for (auto& channelPreprocessorState : preprocessorStates) {
+            // detach echo canceller (set it to NULL)
+            // don't destroy it though, we will reset it when necessary
+            speex_preprocess_ctl(channelPreprocessorState.get(),
+                                 SPEEX_PREPROCESS_SET_ECHO_STATE,
+                                 NULL);
+        }
+    }
+}
+
+void
+SpeexAudioProcessor::enableNoiseSuppression(bool enabled)
+{
+    JAMI_DBG("[speex-dsp] enableNoiseSuppression %d", enabled);
+    spx_int32_t speexSetValue = (spx_int32_t) enabled;
+
+    // for each preprocessor
+    for (auto& channelPreprocessorState : preprocessorStates) {
+        // set denoise status
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_DENOISE,
+                             &speexSetValue);
+        // set de-reverb status
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_DEREVERB,
+                             &speexSetValue);
+    }
+}
+
+void
+SpeexAudioProcessor::enableAutomaticGainControl(bool enabled)
+{
+    JAMI_DBG("[speex-dsp] enableAutomaticGainControl %d", enabled);
+    spx_int32_t speexSetValue = (spx_int32_t) enabled;
+
+    // for each preprocessor
+    for (auto& channelPreprocessorState : preprocessorStates) {
+        // set AGC status
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_AGC,
+                             &speexSetValue);
+    }
+}
+
+std::shared_ptr<AudioFrame>
+SpeexAudioProcessor::getProcessed()
+{
+    if (tidyQueues()) {
+        return {};
+    }
+
+    auto playback = playbackQueue_.dequeue();
+    auto record = recordQueue_.dequeue();
+
+    if (!playback || !record) {
+        return {};
+    }
+
+    auto processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
+
+    if (shouldAEC) {
+        // we want to echo cancel
+        // multichannel, output into processed
+        speex_echo_cancellation(echoState.get(),
+                                (int16_t*) record->pointer()->data[0],
+                                (int16_t*) playback->pointer()->data[0],
+                                (int16_t*) processed->pointer()->data[0]);
+    } else {
+        // don't want to echo cancel, so just use record frame instead
+        processed = record;
+    }
+
+    // deinterleave processed into channels
+    std::vector<int16_t*> procData {format_.nb_channels};
+    iProcBuffer.deinterleave((const AudioSample*) processed->pointer()->data[0],
+                             frameSize_,
+                             format_.nb_channels);
+
+    // point procData to correct channels
+    for (unsigned int channel = 0; channel < format_.nb_channels; channel++) {
+        procData[channel] = iProcBuffer.getChannel(channel)->data();
+    }
+
+    // overall voice activity
+    bool overallVad = false;
+    // current channel voice activity
+    int channelVad;
+
+    // run preprocess on each channel
+    int channel = 0;
+    for (auto& channelPreprocessorState : preprocessorStates) {
+        // preprocesses in place, returns voice activity boolean
+        channelVad = speex_preprocess_run(channelPreprocessorState.get(), procData[channel]);
+
+        // boolean OR
+        overallVad |= channelVad;
+
+        channel += 1;
+    }
+
+    // reinterleave into processed
+    iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);
+
+    // add stabilized voice activity to the AudioFrame
+    processed->has_voice = getStabilizedVoiceActivity(overallVad);
+
+    return processed;
+}
+
+} // namespace jami
diff --git a/src/media/audio/echo-cancel/speex_echo_canceller.h b/src/media/audio/audio-processing/speex.h
similarity index 51%
rename from src/media/audio/echo-cancel/speex_echo_canceller.h
rename to src/media/audio/audio-processing/speex.h
index 653c0a6d67fa618d34ce27cf2410a517c62e32d5..b68714047a76f338db8a61ea7e4282745f09b42e 100644
--- a/src/media/audio/echo-cancel/speex_echo_canceller.h
+++ b/src/media/audio/audio-processing/speex.h
@@ -20,32 +20,44 @@
 
 #pragma once
 
-#include "audio/echo-cancel/echo_canceller.h"
-#include "audio/audio_frame_resizer.h"
+#include "audio_processor.h"
 
+// typedef speex C structs
 extern "C" {
 struct SpeexEchoState_;
 typedef struct SpeexEchoState_ SpeexEchoState;
+struct SpeexPreprocessState_;
+typedef struct SpeexPreprocessState_ SpeexPreprocessState;
 }
 
-#include <memory>
-
 namespace jami {
 
-class SpeexEchoCanceller final : public EchoCanceller
+class SpeexAudioProcessor final : public AudioProcessor
 {
 public:
-    SpeexEchoCanceller(AudioFormat format, unsigned frameSize);
-    ~SpeexEchoCanceller() = default;
+    SpeexAudioProcessor(AudioFormat format, unsigned frameSize);
+    ~SpeexAudioProcessor() = default;
 
-    // Inherited via EchoCanceller
-    void putRecorded(std::shared_ptr<AudioFrame>&& buf) override;
-    void putPlayback(const std::shared_ptr<AudioFrame>& buf) override;
     std::shared_ptr<AudioFrame> getProcessed() override;
-    void done() override;
+
+    void enableEchoCancel(bool enabled) override;
+    void enableNoiseSuppression(bool enabled) override;
+    void enableAutomaticGainControl(bool enabled) override;
 
 private:
-    struct SpeexEchoStateImpl;
-    std::unique_ptr<SpeexEchoStateImpl> pimpl_;
+    using SpeexEchoStatePtr = std::unique_ptr<SpeexEchoState, void (*)(SpeexEchoState*)>;
+    using SpeexPreprocessStatePtr
+        = std::unique_ptr<SpeexPreprocessState, void (*)(SpeexPreprocessState*)>;
+
+    // multichannel, one for the entire audio processor
+    SpeexEchoStatePtr echoState;
+
+    // one for each channel
+    std::vector<SpeexPreprocessStatePtr> preprocessorStates;
+
+    AudioBuffer iProcBuffer;
+
+    // if we should do echo cancellation
+    bool shouldAEC {false};
 };
 } // namespace jami
diff --git a/src/media/audio/audio-processing/webrtc.cpp b/src/media/audio/audio-processing/webrtc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..519c6e6cc08f156938e143cdf0a4b0bbf548b0af
--- /dev/null
+++ b/src/media/audio/audio-processing/webrtc.cpp
@@ -0,0 +1,201 @@
+/*
+ *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
+ */
+
+#include "webrtc.h"
+
+#include <webrtc/modules/audio_processing/include/audio_processing.h>
+
+namespace jami {
+
+constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError;
+
+WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSize)
+    : AudioProcessor(format, frameSize)
+    , fRecordBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
+    , fPlaybackBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
+    , iRecordBuffer_(frameSize_, format)
+    , iPlaybackBuffer_(frameSize_, format)
+{
+    JAMI_DBG("[webrtc-ap] WebRTCAudioProcessor, frame size = %d (=%d ms), channels = %d",
+             frameSize,
+             frameDurationMs_,
+             format.nb_channels);
+    webrtc::Config config;
+    config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
+    config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
+
+    apm.reset(webrtc::AudioProcessing::Create(config));
+
+    webrtc::StreamConfig streamConfig((int) format.sample_rate, (int) format.nb_channels);
+    webrtc::ProcessingConfig pconfig = {
+        streamConfig, /* input stream */
+        streamConfig, /* output stream */
+        streamConfig, /* reverse input stream */
+        streamConfig, /* reverse output stream */
+    };
+
+    if (apm->Initialize(pconfig) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
+    }
+
+    // voice activity
+    if (apm->voice_detection()->Enable(true) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling voice detection");
+    }
+    // TODO: change likelihood?
+    if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
+        != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
+    }
+    // asserted to be 10 in voice_detection_impl.cc
+    if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
+    }
+
+    JAMI_INFO("[webrtc-ap] Done initializing");
+}
+
+void
+WebRTCAudioProcessor::enableNoiseSuppression(bool enabled)
+{
+    JAMI_DBG("[webrtc-ap] enableNoiseSuppression %d", enabled);
+    if (apm->noise_suppression()->Enable(enabled) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling noise suppression");
+    }
+    if (apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kVeryHigh) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting noise suppression level");
+    }
+    if (apm->high_pass_filter()->Enable(enabled) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling high pass filter");
+    }
+}
+
+void
+WebRTCAudioProcessor::enableAutomaticGainControl(bool enabled)
+{
+    JAMI_DBG("[webrtc-ap] enableAutomaticGainControl %d", enabled);
+    if (apm->gain_control()->Enable(enabled) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling automatic gain control");
+    }
+    if (apm->gain_control()->set_analog_level_limits(0, 255) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting automatic gain control analog level limits");
+    }
+    if (apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting automatic gain control mode");
+    }
+}
+
+void
+WebRTCAudioProcessor::enableEchoCancel(bool enabled)
+{
+    JAMI_DBG("[webrtc-ap] enableEchoCancel %d", enabled);
+
+    if (apm->echo_cancellation()->Enable(enabled) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling echo cancellation");
+    }
+    if (apm->echo_cancellation()->set_suppression_level(
+            webrtc::EchoCancellation::SuppressionLevel::kHighSuppression)
+        != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting echo cancellation level");
+    }
+    if (apm->echo_cancellation()->enable_drift_compensation(true) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling echo cancellation drift compensation");
+    }
+}
+
+std::shared_ptr<AudioFrame>
+WebRTCAudioProcessor::getProcessed()
+{
+    if (tidyQueues()) {
+        return {};
+    }
+
+    int driftSamples = playbackQueue_.samples() - recordQueue_.samples();
+
+    auto playback = playbackQueue_.dequeue();
+    auto record = recordQueue_.dequeue();
+    if (!playback || !record) {
+        return {};
+    }
+
+    auto processed = std::make_shared<AudioFrame>(format_, frameSize_);
+
+    // webrtc::StreamConfig& sc = streamConfig;
+    webrtc::StreamConfig sc((int) format_.sample_rate, (int) format_.nb_channels);
+
+    // analyze deinterleaved float playback data
+    iPlaybackBuffer_.deinterleave((const AudioSample*) playback->pointer()->data[0],
+                                  frameSize_,
+                                  format_.nb_channels);
+    std::vector<float*> playData {format_.nb_channels};
+    for (unsigned channel = 0; channel < format_.nb_channels; ++channel) {
+        // point playData channel to appropriate data location
+        playData[channel] = fPlaybackBuffer_[channel].data();
+
+        // write playback to playData channel
+        iPlaybackBuffer_.channelToFloat(playData[channel], (int) channel);
+    }
+
+    // process reverse in place
+    if (apm->ProcessReverseStream(playData.data(), sc, sc, playData.data()) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] ProcessReverseStream failed");
+    }
+
+    // process deinterleaved float recorded data
+    iRecordBuffer_.deinterleave((const AudioSample*) record->pointer()->data[0],
+                                frameSize_,
+                                format_.nb_channels);
+    std::vector<float*> recData {format_.nb_channels};
+    for (unsigned int channel = 0; channel < format_.nb_channels; ++channel) {
+        // point recData channel to appropriate data location
+        recData[channel] = fRecordBuffer_[channel].data();
+
+        // write data to recData channel
+        iRecordBuffer_.channelToFloat(recData[channel], (int) channel);
+    }
+    // TODO: maybe implement this to see if it's better than automatic drift compensation
+    // (it MUST be called prior to ProcessStream)
+    // delay = (t_render - t_analyze) + (t_process - t_capture)
+    if (apm->set_stream_delay_ms(0) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] set_stream_delay_ms failed");
+    }
+
+    if (apm->gain_control()->set_stream_analog_level(analogLevel_) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] set_stream_analog_level failed");
+    }
+    apm->echo_cancellation()->set_stream_drift_samples(driftSamples);
+
+    // process in place
+    if (apm->ProcessStream(recData.data(), sc, sc, recData.data()) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] ProcessStream failed");
+    }
+
+    analogLevel_ = apm->gain_control()->stream_analog_level();
+
+    // return interleaved s16 data
+    iRecordBuffer_.convertFloatPlanarToSigned16((uint8_t**) recData.data(),
+                                                frameSize_,
+                                                format_.nb_channels);
+    iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
+
+    processed->has_voice = getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
+
+    return processed;
+}
+
+} // namespace jami
diff --git a/src/media/audio/echo-cancel/webrtc_echo_canceller.h b/src/media/audio/audio-processing/webrtc.h
similarity index 64%
rename from src/media/audio/echo-cancel/webrtc_echo_canceller.h
rename to src/media/audio/audio-processing/webrtc.h
index 9984827943b401b46518efac4f1e4e1a01b3d20f..5d0295c142a349bc81a4a0bf8ee41a7b5b07e9cb 100644
--- a/src/media/audio/echo-cancel/webrtc_echo_canceller.h
+++ b/src/media/audio/audio-processing/webrtc.h
@@ -1,8 +1,6 @@
 /*
  *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
  *
- *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 3 of the License, or
@@ -20,28 +18,29 @@
 
 #pragma once
 
-#include "audio/echo-cancel/echo_canceller.h"
-#include "audio/audio_frame_resizer.h"
+#include "audio_processor.h"
 
-#include <memory>
+namespace webrtc {
+class AudioProcessing;
+}
 
 namespace jami {
 
-class WebRTCEchoCanceller final : public EchoCanceller
+class WebRTCAudioProcessor final : public AudioProcessor
 {
 public:
-    WebRTCEchoCanceller(AudioFormat format, unsigned frameSize);
-    ~WebRTCEchoCanceller() = default;
+    WebRTCAudioProcessor(AudioFormat format, unsigned frameSize);
+    ~WebRTCAudioProcessor() = default;
 
-    // Inherited via EchoCanceller
-    void putRecorded(std::shared_ptr<AudioFrame>&& buf) override;
-    void putPlayback(const std::shared_ptr<AudioFrame>& buf) override;
+    // Inherited via AudioProcessor
     std::shared_ptr<AudioFrame> getProcessed() override;
-    void done() override;
+
+    void enableEchoCancel(bool enabled) override;
+    void enableNoiseSuppression(bool enabled) override;
+    void enableAutomaticGainControl(bool enabled) override;
 
 private:
-    struct WebRTCAPMImpl;
-    std::unique_ptr<WebRTCAPMImpl> pimpl_;
+    std::unique_ptr<webrtc::AudioProcessing> apm;
 
     using fChannelBuffer = std::vector<std::vector<float>>;
     fChannelBuffer fRecordBuffer_;
diff --git a/src/media/audio/audio_frame_resizer.cpp b/src/media/audio/audio_frame_resizer.cpp
index d7d79d63bce49c0d256639425f9c198416ad3e49..a5fe18b1b40f7ed3ae132c239c9d7f232ab27a73 100644
--- a/src/media/audio/audio_frame_resizer.cpp
+++ b/src/media/audio/audio_frame_resizer.cpp
@@ -109,6 +109,9 @@ AudioFrameResizer::enqueue(std::shared_ptr<AudioFrame>&& frame)
         return; // return if frame was just passed through
     }
 
+    // voice activity
+    hasVoice_ = frame->has_voice;
+
     // queue reallocates itself if need be
     if ((ret = av_audio_fifo_write(queue_, reinterpret_cast<void**>(f->data), f->nb_samples)) < 0) {
         JAMI_ERR() << "Audio resizer error: " << libav_utils::getError(ret);
@@ -139,6 +142,7 @@ AudioFrameResizer::dequeue()
         return {};
     }
     frame->pointer()->pts = nextOutputPts_;
+    frame->has_voice = hasVoice_;
     nextOutputPts_ += frameSize_;
     return frame;
 }
diff --git a/src/media/audio/audio_frame_resizer.h b/src/media/audio/audio_frame_resizer.h
index 246d076688c94fa3aa2be4e88137b1a168733654..cdc41beae211cbe39a36291f900ecc1995327001 100644
--- a/src/media/audio/audio_frame_resizer.h
+++ b/src/media/audio/audio_frame_resizer.h
@@ -104,6 +104,7 @@ private:
      */
     AVAudioFifo* queue_;
     int64_t nextOutputPts_ {0};
+    bool hasVoice_ {false};
 };
 
 } // namespace jami
diff --git a/src/media/audio/audio_input.cpp b/src/media/audio/audio_input.cpp
index 98fc993d027d8aef01ac471fa8dfbed0e239c913..03027026dac2a87b306da3f3e29eec7924897a48 100644
--- a/src/media/audio/audio_input.cpp
+++ b/src/media/audio/audio_input.cpp
@@ -122,18 +122,20 @@ AudioInput::readFromDevice()
     std::this_thread::sleep_until(wakeUp_);
     wakeUp_ += MS_PER_PACKET;
 
-    auto& mainBuffer = Manager::instance().getRingBufferPool();
-    auto samples = mainBuffer.getData(id_);
-    if (not samples)
+    auto& bufferPool = Manager::instance().getRingBufferPool();
+    auto audioFrame = bufferPool.getData(id_);
+    if (not audioFrame)
         return;
 
-    if (muteState_)
-        libav_utils::fillWithSilence(samples->pointer());
+    if (muteState_) {
+        libav_utils::fillWithSilence(audioFrame->pointer());
+        audioFrame->has_voice = false; // force no voice activity when muted
+    }
 
     std::lock_guard<std::mutex> lk(fmtMutex_);
-    if (mainBuffer.getInternalAudioFormat() != format_)
-        samples = resampler_->resample(std::move(samples), format_);
-    resizer_->enqueue(std::move(samples));
+    if (bufferPool.getInternalAudioFormat() != format_)
+        audioFrame = resampler_->resample(std::move(audioFrame), format_);
+    resizer_->enqueue(std::move(audioFrame));
 }
 
 void
diff --git a/src/media/audio/audio_input.h b/src/media/audio/audio_input.h
index 88d46d9a3a9587e3c439ca044757fefd647e919d..23b87a956ed270c1a08bb6aa7af5627515cb07d3 100644
--- a/src/media/audio/audio_input.h
+++ b/src/media/audio/audio_input.h
@@ -67,7 +67,9 @@ public:
     void setSeekTime(int64_t time);
 
     void setSuccessfulSetupCb(const std::function<void(MediaType, bool)>& cb)
-        { onSuccessfulSetup_ = cb; }
+    {
+        onSuccessfulSetup_ = cb;
+    }
 
 private:
     void readFromDevice();
diff --git a/src/media/audio/audiobuffer.h b/src/media/audio/audiobuffer.h
index 7e5634c547806eae43a2d4385fbd9ea2ca558f90..ba60dd38434029f8ca9b7885e16b638e8b220ad0 100644
--- a/src/media/audio/audiobuffer.h
+++ b/src/media/audio/audiobuffer.h
@@ -51,13 +51,7 @@ struct AudioFormat
     unsigned nb_channels;
     AVSampleFormat sampleFormat;
 
-    constexpr AudioFormat(unsigned sr, unsigned c)
-        : sample_rate(sr)
-        , nb_channels(c)
-        , sampleFormat(AV_SAMPLE_FMT_S16)
-    {}
-
-    constexpr AudioFormat(unsigned sr, unsigned c, AVSampleFormat f)
+    constexpr AudioFormat(unsigned sr, unsigned c, AVSampleFormat f = AV_SAMPLE_FMT_S16)
         : sample_rate(sr)
         , nb_channels(c)
         , sampleFormat(f)
diff --git a/src/media/audio/audiolayer.cpp b/src/media/audio/audiolayer.cpp
index 27b1ef0b54e747206497e4ca924f349a276485f4..b640999c88c8a113480d1816bdff453686effdc2 100644
--- a/src/media/audio/audiolayer.cpp
+++ b/src/media/audio/audiolayer.cpp
@@ -28,11 +28,13 @@
 #include "tonecontrol.h"
 #include "client/ring_signal.h"
 
-// aec
+// TODO: decide which library to use/how to decide (compile time? runtime?)
 #if HAVE_WEBRTC_AP
-#include "echo-cancel/webrtc_echo_canceller.h"
+#include "audio-processing/webrtc.h"
+#elif HAVE_SPEEXDSP
+#include "audio-processing/speex.h"
 #else
-#include "echo-cancel/null_echo_canceller.h"
+#include "audio-processing/null_audio_processor.h"
 #endif
 
 #include <ctime>
@@ -102,55 +104,89 @@ void
 AudioLayer::playbackChanged(bool started)
 {
     playbackStarted_ = started;
-    checkAEC();
 }
 
 void
 AudioLayer::recordChanged(bool started)
 {
+    std::lock_guard<std::mutex> lock(audioProcessorMutex);
+    if (started) {
+        // create audio processor
+        createAudioProcessor();
+    } else {
+        // destroy audio processor
+        destroyAudioProcessor();
+    }
     recordStarted_ = started;
-    checkAEC();
 }
 
 void
 AudioLayer::setHasNativeAEC(bool hasEAC)
 {
+    std::lock_guard<std::mutex> lock(audioProcessorMutex);
     hasNativeAEC_ = hasEAC;
-    checkAEC();
+    // if we have a current audio processor, tell it to enable/disable its own AEC
+    if (audioProcessor) {
+        audioProcessor->enableEchoCancel(!hasEAC);
+    }
 }
 
+// must acquire lock beforehand
 void
-AudioLayer::checkAEC()
+AudioLayer::createAudioProcessor()
 {
-    std::lock_guard<std::mutex> lk(ecMutex_);
-    bool shouldSoftAEC = not hasNativeAEC_ and playbackStarted_ and recordStarted_;
-    if (not echoCanceller_ and shouldSoftAEC) {
-        auto nb_channels = std::min(audioFormat_.nb_channels, audioInputFormat_.nb_channels);
-        auto sample_rate = std::min(audioFormat_.sample_rate, audioInputFormat_.sample_rate);
-        if (sample_rate % 16000u != 0)
-            sample_rate = 16000u * ((sample_rate / 16000u) + 1u);
-        sample_rate = std::clamp(sample_rate, 16000u, 96000u);
-        AudioFormat format {sample_rate, nb_channels};
-        auto frame_size = sample_rate / 100u;
-        JAMI_WARN("Input {%d Hz, %d channels}",
-                  audioInputFormat_.sample_rate,
-                  audioInputFormat_.nb_channels);
-        JAMI_WARN("Output {%d Hz, %d channels}", audioFormat_.sample_rate, audioFormat_.nb_channels);
-        JAMI_WARN("Starting AEC {%d Hz, %d channels, %d samples/frame}",
-                  sample_rate,
-                  nb_channels,
-                  frame_size);
+    auto nb_channels = std::min(audioFormat_.nb_channels, audioInputFormat_.nb_channels);
+    auto sample_rate = std::min(audioFormat_.sample_rate, audioInputFormat_.sample_rate);
+
+    // TODO: explain/rework this math??
+    if (sample_rate % 16000u != 0)
+        sample_rate = 16000u * ((sample_rate / 16000u) + 1u);
+    sample_rate = std::clamp(sample_rate, 16000u, 96000u);
+
+    AudioFormat formatForProcessor {sample_rate, nb_channels};
+
+#if HAVE_SPEEXDSP && !HAVE_WEBRTC_AP
+    // we are using speex
+    // TODO: maybe force this to be equivalent to 20ms? as expected by speex
+    auto frame_size = sample_rate / 50u;
+#else
+    // we are using either webrtc-ap or null
+    auto frame_size = sample_rate / 100u;
+#endif
+    JAMI_WARN("Input {%d Hz, %d channels}",
+              audioInputFormat_.sample_rate,
+              audioInputFormat_.nb_channels);
+    JAMI_WARN("Output {%d Hz, %d channels}", audioFormat_.sample_rate, audioFormat_.nb_channels);
+    JAMI_WARN("Starting audio processor with: {%d Hz, %d channels, %d samples/frame}",
+              sample_rate,
+              nb_channels,
+              frame_size);
 
 #if HAVE_WEBRTC_AP
-        echoCanceller_.reset(new WebRTCEchoCanceller(format, frame_size));
+    JAMI_INFO("[audiolayer] using webrtc audio processor");
+    audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
+#elif HAVE_SPEEXDSP
+    JAMI_INFO("[audiolayer] using speex audio processor");
+    audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
 #else
-        echoCanceller_.reset(new NullEchoCanceller(format, frame_size));
+    JAMI_INFO("[audiolayer] using null audio processor");
+    audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
 #endif
-    } else if (echoCanceller_ and not shouldSoftAEC and not playbackStarted_
-               and not recordStarted_) {
-        JAMI_WARN("Stopping AEC");
-        echoCanceller_.reset();
-    }
+
+    audioProcessor->enableNoiseSuppression(true);
+    // TODO: enable AGC?
+    audioProcessor->enableAutomaticGainControl(false);
+
+    // can also be updated after creation via setHasNativeAEC
+    audioProcessor->enableEchoCancel(!hasNativeAEC_);
+}
+
+// must acquire lock beforehand
+void
+AudioLayer::destroyAudioProcessor()
+{
+    // delete it
+    audioProcessor.reset();
 }
 
 void
@@ -228,19 +264,19 @@ AudioLayer::getToPlay(AudioFormat format, size_t writableSamples)
         } else if (auto buf = bufferPool.getData(RingBufferPool::DEFAULT_ID)) {
             resampled = resampler_->resample(std::move(buf), format);
         } else {
-            if (echoCanceller_) {
+            std::lock_guard<std::mutex> lock(audioProcessorMutex);
+            if (audioProcessor) {
                 auto silence = std::make_shared<AudioFrame>(format, writableSamples);
                 libav_utils::fillWithSilence(silence->pointer());
-                std::lock_guard<std::mutex> lk(ecMutex_);
-                echoCanceller_->putPlayback(silence);
+                audioProcessor->putPlayback(silence);
             }
             break;
         }
 
         if (resampled) {
-            if (echoCanceller_) {
-                std::lock_guard<std::mutex> lk(ecMutex_);
-                echoCanceller_->putPlayback(resampled);
+            std::lock_guard<std::mutex> lock(audioProcessorMutex);
+            if (audioProcessor) {
+                audioProcessor->putPlayback(resampled);
             }
             playbackQueue_->enqueue(std::move(resampled));
         } else
@@ -253,12 +289,13 @@ AudioLayer::getToPlay(AudioFormat format, size_t writableSamples)
 void
 AudioLayer::putRecorded(std::shared_ptr<AudioFrame>&& frame)
 {
-    if (echoCanceller_) {
-        std::lock_guard<std::mutex> lk(ecMutex_);
-        echoCanceller_->putRecorded(std::move(frame));
-        while (auto rec = echoCanceller_->getProcessed()) {
+    std::lock_guard<std::mutex> lock(audioProcessorMutex);
+    if (audioProcessor && playbackStarted_ && recordStarted_) {
+        audioProcessor->putRecorded(std::move(frame));
+        while (auto rec = audioProcessor->getProcessed()) {
             mainRingBuffer_->put(std::move(rec));
         }
+
     } else {
         mainRingBuffer_->put(std::move(frame));
     }
diff --git a/src/media/audio/audiolayer.h b/src/media/audio/audiolayer.h
index 2e30742be63f62badf09a0c0e8d9c11cd0f5b9e5..b1d252fdd3acdab76c8234e3da8a88827d5e29cc 100644
--- a/src/media/audio/audiolayer.h
+++ b/src/media/audio/audiolayer.h
@@ -26,7 +26,7 @@
 #include "dcblocker.h"
 #include "noncopyable.h"
 #include "audio_frame_resizer.h"
-#include "echo-cancel/echo_canceller.h"
+#include "audio-processing/audio_processor.h"
 
 #include <chrono>
 #include <mutex>
@@ -295,11 +295,12 @@ protected:
      */
     std::unique_ptr<Resampler> resampler_;
 
-    std::mutex ecMutex_ {};
-    std::unique_ptr<EchoCanceller> echoCanceller_;
-
 private:
-    void checkAEC();
+    std::mutex audioProcessorMutex {};
+    std::unique_ptr<AudioProcessor> audioProcessor;
+
+    void createAudioProcessor();
+    void destroyAudioProcessor();
 
     // Set to "true" to play the incoming call notification (beep)
     // when the playback is on (typically when there is already an
diff --git a/src/media/audio/echo-cancel/CMakeLists.txt b/src/media/audio/echo-cancel/CMakeLists.txt
deleted file mode 100644
index 93e8440fbaa4ed620880cdca7b4098f86360bd35..0000000000000000000000000000000000000000
--- a/src/media/audio/echo-cancel/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-################################################################################
-# Source groups - echo-cancel
-################################################################################
-list (APPEND Source_Files__media__audio__echo_cancel
-      "${CMAKE_CURRENT_SOURCE_DIR}/echo_canceller.h"
-      "${CMAKE_CURRENT_SOURCE_DIR}/null_echo_canceller.h"
-      "${CMAKE_CURRENT_SOURCE_DIR}/null_echo_canceller.cpp"
-      "${CMAKE_CURRENT_SOURCE_DIR}/speex_echo_canceller.h"
-      "${CMAKE_CURRENT_SOURCE_DIR}/speex_echo_canceller.cpp"
-      "${CMAKE_CURRENT_SOURCE_DIR}/webrtc_echo_canceller.h"
-      "${CMAKE_CURRENT_SOURCE_DIR}/webrtc_echo_canceller.cpp"
-)
-
-set (Source_Files__media__audio__echo_cancel ${Source_Files__media__audio__echo_cancel} PARENT_SCOPE)
\ No newline at end of file
diff --git a/src/media/audio/echo-cancel/Makefile.am b/src/media/audio/echo-cancel/Makefile.am
deleted file mode 100644
index 197d4c769ac15af7e74848da6a177247a9d79e65..0000000000000000000000000000000000000000
--- a/src/media/audio/echo-cancel/Makefile.am
+++ /dev/null
@@ -1,24 +0,0 @@
-noinst_LTLIBRARIES += libecho-cancel.la
-
-EC_SRC = ./media/audio/echo-cancel/null_echo_canceller.cpp
-EC_HDR = ./media/audio/echo-cancel/null_echo_canceller.h
-
-if BUILD_SPEEXDSP
-EC_SRC += ./media/audio/echo-cancel/speex_echo_canceller.cpp
-EC_HDR += ./media/audio/echo-cancel/speex_echo_canceller.h
-endif
-
-if HAVE_WEBRTC_AP
-EC_SRC += ./media/audio/echo-cancel/webrtc_echo_canceller.cpp
-EC_HDR += ./media/audio/echo-cancel/webrtc_echo_canceller.h
-libecho_cancel_la_CXXFLAGS = @WEBRTC_CFLAGS@ $(AM_CXXFLAGS)
-endif
-
-libecho_cancel_la_SOURCES = \
-	$(EC_SRC)
-
-noinst_HEADERS += \
-	./media/audio/echo-cancel/echo_canceller.h \
-	$(EC_HDR)
-
-libaudio_la_LIBADD += libecho-cancel.la
diff --git a/src/media/audio/echo-cancel/echo_canceller.h b/src/media/audio/echo-cancel/echo_canceller.h
deleted file mode 100644
index 75fbb9bdb6c91aec961cd52584ec25a7ed0c7f3a..0000000000000000000000000000000000000000
--- a/src/media/audio/echo-cancel/echo_canceller.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
- *
- *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
- */
-
-#pragma once
-
-#include "noncopyable.h"
-#include "audio/audio_frame_resizer.h"
-#include "audio/resampler.h"
-#include "audio/audiobuffer.h"
-#include "libav_deps.h"
-
-#include <atomic>
-#include <memory>
-
-namespace jami {
-
-class EchoCanceller
-{
-private:
-    NON_COPYABLE(EchoCanceller);
-
-public:
-    EchoCanceller(AudioFormat format, unsigned frameSize)
-        : playbackQueue_(format, frameSize)
-        , recordQueue_(format, frameSize)
-        , resampler_(new Resampler)
-        , format_(format)
-        , frameSize_(frameSize)
-    {}
-    virtual ~EchoCanceller() = default;
-
-    virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf)
-    {
-        recordStarted_ = true;
-        if (!playbackStarted_)
-            return;
-        enqueue(recordQueue_, std::move(buf));
-    };
-    virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf)
-    {
-        playbackStarted_ = true;
-        if (!recordStarted_)
-            return;
-        auto copy = buf;
-        enqueue(playbackQueue_, std::move(copy));
-    };
-    virtual std::shared_ptr<AudioFrame> getProcessed() = 0;
-    virtual void done() = 0;
-
-protected:
-    AudioFrameResizer playbackQueue_;
-    AudioFrameResizer recordQueue_;
-    std::unique_ptr<Resampler> resampler_;
-    std::atomic_bool playbackStarted_;
-    std::atomic_bool recordStarted_;
-    AudioFormat format_;
-    unsigned frameSize_;
-
-private:
-    void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf)
-    {
-        if (buf->getFormat() != format_) {
-            auto resampled = resampler_->resample(std::move(buf), format_);
-            frameResizer.enqueue(std::move(resampled));
-        } else
-            frameResizer.enqueue(std::move(buf));
-    };
-};
-
-} // namespace jami
diff --git a/src/media/audio/echo-cancel/null_echo_canceller.cpp b/src/media/audio/echo-cancel/null_echo_canceller.cpp
deleted file mode 100644
index b91e0d9b9de6bcb93d886bdec787c47bbc62fa20..0000000000000000000000000000000000000000
--- a/src/media/audio/echo-cancel/null_echo_canceller.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
- *
- *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
- */
-
-#include "null_echo_canceller.h"
-
-#include <cassert>
-
-namespace jami {
-
-NullEchoCanceller::NullEchoCanceller(AudioFormat format, unsigned frameSize)
-    : EchoCanceller(format, frameSize)
-{}
-
-void
-NullEchoCanceller::putRecorded(std::shared_ptr<AudioFrame>&& buf)
-{
-    EchoCanceller::putRecorded(std::move(buf));
-};
-
-void
-NullEchoCanceller::putPlayback(const std::shared_ptr<AudioFrame>& buf)
-{
-    EchoCanceller::putPlayback(buf);
-};
-
-std::shared_ptr<AudioFrame>
-NullEchoCanceller::getProcessed()
-{
-    while (recordQueue_.samples() > recordQueue_.frameSize() * 10) {
-        JAMI_DBG("record overflow %d / %d", recordQueue_.samples(), frameSize_);
-        recordQueue_.dequeue();
-    }
-    while (playbackQueue_.samples() > playbackQueue_.frameSize() * 10) {
-        JAMI_DBG("playback overflow %d / %d", playbackQueue_.samples(), frameSize_);
-        playbackQueue_.dequeue();
-    }
-    if (recordQueue_.samples() < recordQueue_.frameSize()
-        || playbackQueue_.samples() < playbackQueue_.frameSize()) {
-        JAMI_DBG("underflow rec: %d, play: %d fs: %d",
-                 recordQueue_.samples(),
-                 playbackQueue_.samples(),
-                 frameSize_);
-        return {};
-    }
-
-    JAMI_WARN("Processing %d samples, rec: %d, play: %d ",
-              frameSize_,
-              recordQueue_.samples(),
-              playbackQueue_.samples());
-    playbackQueue_.dequeue();
-    return recordQueue_.dequeue();
-};
-
-void NullEchoCanceller::done() {};
-
-} // namespace jami
diff --git a/src/media/audio/echo-cancel/speex_echo_canceller.cpp b/src/media/audio/echo-cancel/speex_echo_canceller.cpp
deleted file mode 100644
index 334f18f5cd3e5de63840daba2d53484d78057adb..0000000000000000000000000000000000000000
--- a/src/media/audio/echo-cancel/speex_echo_canceller.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
- *
- *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
- */
-
-#include "speex_echo_canceller.h"
-
-#include "audio/audiolayer.h"
-
-extern "C" {
-#include <speex/speex_echo.h>
-#include <speex/speex_preprocess.h>
-}
-
-namespace jami {
-
-struct SpeexEchoCanceller::SpeexEchoStateImpl
-{
-    using SpeexEchoStatePtr = std::unique_ptr<SpeexEchoState, void (*)(SpeexEchoState*)>;
-    SpeexEchoStatePtr state;
-
-    SpeexEchoStateImpl(AudioFormat format, unsigned frameSize)
-        : state(speex_echo_state_init_mc(frameSize,
-                                         frameSize * 16,
-                                         format.nb_channels,
-                                         format.nb_channels),
-                &speex_echo_state_destroy)
-    {
-        int sr = format.sample_rate;
-        speex_echo_ctl(state.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &sr);
-    }
-};
-
-SpeexEchoCanceller::SpeexEchoCanceller(AudioFormat format, unsigned frameSize)
-    : EchoCanceller(format, frameSize)
-    , pimpl_(std::make_unique<SpeexEchoStateImpl>(format, frameSize))
-{
-    speex_echo_ctl(pimpl_->state.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
-}
-
-void
-SpeexEchoCanceller::putRecorded(std::shared_ptr<AudioFrame>&& buf)
-{
-    EchoCanceller::putRecorded(std::move(buf));
-}
-
-void
-SpeexEchoCanceller::putPlayback(const std::shared_ptr<AudioFrame>& buf)
-{
-    EchoCanceller::putPlayback(buf);
-}
-
-std::shared_ptr<AudioFrame>
-SpeexEchoCanceller::getProcessed()
-{
-    if (playbackQueue_.samples() < playbackQueue_.frameSize()
-        or recordQueue_.samples() < recordQueue_.frameSize()) {
-        JAMI_DBG("getRecorded underflow %d / %d, %d / %d",
-                 playbackQueue_.samples(),
-                 playbackQueue_.frameSize(),
-                 recordQueue_.samples(),
-                 recordQueue_.frameSize());
-        return {};
-    }
-    if (recordQueue_.samples() > 2 * recordQueue_.frameSize() && playbackQueue_.samples() == 0) {
-        JAMI_DBG("getRecorded PLAYBACK underflow");
-        return recordQueue_.dequeue();
-    }
-    while (playbackQueue_.samples() > 10 * playbackQueue_.frameSize()) {
-        JAMI_DBG("getRecorded record underflow");
-        playbackQueue_.dequeue();
-    }
-    while (recordQueue_.samples() > 4 * recordQueue_.frameSize()) {
-        JAMI_DBG("getRecorded playback underflow");
-        recordQueue_.dequeue();
-    }
-    auto playback = playbackQueue_.dequeue();
-    auto record = recordQueue_.dequeue();
-    if (playback and record) {
-        auto ret = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
-        speex_echo_cancellation(pimpl_->state.get(),
-                                (const int16_t*) record->pointer()->data[0],
-                                (const int16_t*) playback->pointer()->data[0],
-                                (int16_t*) ret->pointer()->data[0]);
-        return ret;
-    }
-    return {};
-}
-
-void
-SpeexEchoCanceller::done()
-{}
-
-} // namespace jami
diff --git a/src/media/audio/echo-cancel/webrtc_echo_canceller.cpp b/src/media/audio/echo-cancel/webrtc_echo_canceller.cpp
deleted file mode 100644
index ee2ecaf732ed2db7c02b6f8fb914ee466eaba44f..0000000000000000000000000000000000000000
--- a/src/media/audio/echo-cancel/webrtc_echo_canceller.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- *  Copyright (C) 2021-2022 Savoir-faire Linux Inc.
- *
- *  Author: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
- */
-
-#include "webrtc_echo_canceller.h"
-
-#include <webrtc/modules/audio_processing/include/audio_processing.h>
-
-namespace jami {
-
-WebRTCEchoCanceller::WebRTCEchoCanceller(AudioFormat format, unsigned frameSize)
-    : EchoCanceller(format, frameSize)
-    , pimpl_(std::make_unique<WebRTCAPMImpl>(format, frameSize))
-    , fRecordBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
-    , fPlaybackBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
-    , iRecordBuffer_(frameSize_, format)
-    , iPlaybackBuffer_(frameSize_, format)
-{}
-
-struct WebRTCEchoCanceller::WebRTCAPMImpl
-{
-    using APMPtr = std::unique_ptr<webrtc::AudioProcessing>;
-    APMPtr apm;
-    webrtc::StreamConfig streamConfig;
-
-    WebRTCAPMImpl(AudioFormat format, unsigned)
-        : streamConfig(format.sample_rate, format.nb_channels)
-    {
-        webrtc::ProcessingConfig pconfig;
-        webrtc::Config config;
-
-        config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
-        config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
-
-        apm.reset(webrtc::AudioProcessing::Create(config));
-
-        pconfig = {
-            streamConfig, /* input stream */
-            streamConfig, /* output stream */
-            streamConfig, /* reverse input stream */
-            streamConfig, /* reverse output stream */
-        };
-
-        if (apm->Initialize(pconfig) != webrtc::AudioProcessing::kNoError) {
-            JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
-        }
-
-        // aec
-        apm->echo_cancellation()->set_suppression_level(
-            webrtc::EchoCancellation::SuppressionLevel::kModerateSuppression);
-        apm->echo_cancellation()->enable_drift_compensation(true);
-        apm->echo_cancellation()->Enable(true);
-
-        // hpf
-        apm->high_pass_filter()->Enable(true);
-
-        // ns
-        apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kHigh);
-        apm->noise_suppression()->Enable(true);
-
-        // agc
-        apm->gain_control()->set_analog_level_limits(0, 255);
-        apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
-        apm->gain_control()->Enable(true);
-    }
-};
-
-void
-WebRTCEchoCanceller::putRecorded(std::shared_ptr<AudioFrame>&& buf)
-{
-    EchoCanceller::putRecorded(std::move(buf));
-}
-
-void
-WebRTCEchoCanceller::putPlayback(const std::shared_ptr<AudioFrame>& buf)
-{
-    EchoCanceller::putPlayback(buf);
-}
-
-std::shared_ptr<AudioFrame>
-WebRTCEchoCanceller::getProcessed()
-{
-    while (recordQueue_.samples() > recordQueue_.frameSize() * 10) {
-        JAMI_DBG("record overflow %d / %d", recordQueue_.samples(), frameSize_);
-        recordQueue_.dequeue();
-    }
-    while (playbackQueue_.samples() > playbackQueue_.frameSize() * 10) {
-        JAMI_DBG("playback overflow %d / %d", playbackQueue_.samples(), frameSize_);
-        playbackQueue_.dequeue();
-    }
-    if (recordQueue_.samples() < recordQueue_.frameSize()
-        || playbackQueue_.samples() < playbackQueue_.frameSize()) {
-        // If there are not enough samples in either queue, we can't
-        // process anything.
-        // JAMI_DBG("underrun p:%d / r:%d", playbackQueue_.samples(), recordQueue_.samples());
-        return {};
-    }
-
-    int driftSamples = playbackQueue_.samples() - recordQueue_.samples();
-
-    auto playback = playbackQueue_.dequeue();
-    auto record = recordQueue_.dequeue();
-    if (!playback || !record)
-        return {};
-
-    auto processed = std::make_shared<AudioFrame>(format_, frameSize_);
-
-    webrtc::StreamConfig& sc = pimpl_->streamConfig;
-
-    // analyze deinterleaved float playback data
-    iPlaybackBuffer_.deinterleave((const AudioSample*) playback->pointer()->data[0],
-                                  frameSize_,
-                                  format_.nb_channels);
-    std::vector<float*> playData {format_.nb_channels};
-    for (unsigned c = 0; c < format_.nb_channels; ++c) {
-        playData[c] = fPlaybackBuffer_[c].data();
-        iPlaybackBuffer_.channelToFloat(playData[c], c);
-    }
-    if (pimpl_->apm->ProcessReverseStream(playData.data(), sc, sc, playData.data())
-        != webrtc::AudioProcessing::kNoError)
-        JAMI_ERR("[webrtc-ap] ProcessReverseStream failed");
-
-    // process deinterleaved float recorded data
-    iRecordBuffer_.deinterleave((const AudioSample*) record->pointer()->data[0],
-                                frameSize_,
-                                format_.nb_channels);
-    std::vector<float*> recData {format_.nb_channels};
-    for (unsigned c = 0; c < format_.nb_channels; ++c) {
-        recData[c] = fRecordBuffer_[c].data();
-        iRecordBuffer_.channelToFloat(recData[c], c);
-    }
-    // TODO: implement this correctly (it MUST be called prior to ProcessStream)
-    // delay = (t_render - t_analyze) + (t_process - t_capture)
-    pimpl_->apm->set_stream_delay_ms(0);
-    pimpl_->apm->gain_control()->set_stream_analog_level(analogLevel_);
-    pimpl_->apm->echo_cancellation()->set_stream_drift_samples(driftSamples);
-    if (pimpl_->apm->ProcessStream(recData.data(), sc, sc, recData.data())
-        != webrtc::AudioProcessing::kNoError)
-        JAMI_ERR("[webrtc-ap] ProcessStream failed");
-    analogLevel_ = pimpl_->apm->gain_control()->stream_analog_level();
-
-    // return interleaved s16 data
-    iRecordBuffer_.convertFloatPlanarToSigned16((uint8_t**) recData.data(),
-                                                frameSize_,
-                                                format_.nb_channels);
-    iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
-    return processed;
-}
-
-void
-WebRTCEchoCanceller::done()
-{}
-
-} // namespace jami
diff --git a/src/media/audio/pulseaudio/pulselayer.cpp b/src/media/audio/pulseaudio/pulselayer.cpp
index d2a6795372f9788b20ca82e65c764eaf1698ae6a..134aa7ebc60caee9660ac4147c2b96e6b1446f91 100644
--- a/src/media/audio/pulseaudio/pulselayer.cpp
+++ b/src/media/audio/pulseaudio/pulselayer.cpp
@@ -67,6 +67,7 @@ PulseLayer::PulseLayer(AudioPreference& pref)
     , mainloop_(pa_threaded_mainloop_new(), pa_threaded_mainloop_free)
     , preference_(pref)
 {
+    JAMI_INFO("[audiolayer] created pulseaudio layer");
     if (!mainloop_)
         throw std::runtime_error("Couldn't create pulseaudio mainloop");
 
@@ -118,6 +119,9 @@ PulseLayer::~PulseLayer()
 
     if (subscribeOp_)
         pa_operation_unref(subscribeOp_);
+
+    playbackChanged(false);
+    recordChanged(false);
 }
 
 void
@@ -131,8 +135,8 @@ PulseLayer::context_state_callback(pa_context* c, void* user_data)
 void
 PulseLayer::contextStateChanged(pa_context* c)
 {
-    const pa_subscription_mask_t mask = (pa_subscription_mask_t)(PA_SUBSCRIPTION_MASK_SINK
-                                                                 | PA_SUBSCRIPTION_MASK_SOURCE);
+    const pa_subscription_mask_t mask = (pa_subscription_mask_t) (PA_SUBSCRIPTION_MASK_SINK
+                                                                  | PA_SUBSCRIPTION_MASK_SOURCE);
 
     switch (pa_context_get_state(c)) {
     case PA_CONTEXT_CONNECTING:
@@ -367,12 +371,17 @@ PulseLayer::onStreamReady()
         // called is to notify a new event
         flushUrgent();
         flushMain();
-        if (playback_)
+        if (playback_) {
             playback_->start();
-        if (ringtone_)
+            playbackChanged(true);
+        }
+        if (ringtone_) {
             ringtone_->start();
-        if (record_)
+        }
+        if (record_) {
             record_->start();
+            recordChanged(true);
+        }
     }
 }
 
@@ -411,6 +420,8 @@ PulseLayer::disconnectAudioStream()
     playback_.reset();
     ringtone_.reset();
     record_.reset();
+    playbackChanged(false);
+    recordChanged(false);
     pendingStreams = 0;
     status_ = Status::Idle;
     startedCv_.notify_all();
diff --git a/src/media/audio/resampler.cpp b/src/media/audio/resampler.cpp
index 1c4f3f95b23a261ed6c8b9334f4642190b5c24fe..b97a21ae60024c867c3a68a3b2f8f9e4a25a261d 100644
--- a/src/media/audio/resampler.cpp
+++ b/src/media/audio/resampler.cpp
@@ -175,6 +175,7 @@ Resampler::resample(std::unique_ptr<AudioFrame>&& in, const AudioFormat& format)
     }
     auto output = std::make_unique<AudioFrame>(format);
     resample(in->pointer(), output->pointer());
+    output->has_voice = in->has_voice;
     return output;
 }
 
@@ -198,6 +199,7 @@ Resampler::resample(std::shared_ptr<AudioFrame>&& in, const AudioFormat& format)
     auto output = std::make_shared<AudioFrame>(format);
     if (auto outPtr = output->pointer()) {
         resample(inPtr, outPtr);
+        output->has_voice = in->has_voice;
         return output;
     }
     return {};
diff --git a/src/media/audio/ringbufferpool.cpp b/src/media/audio/ringbufferpool.cpp
index 2fecf9dfc08ce9f4244635661ee1949e6f7152c7..3d939d2899d3867d13811e725becc1c512027c52 100644
--- a/src/media/audio/ringbufferpool.cpp
+++ b/src/media/audio/ringbufferpool.cpp
@@ -299,6 +299,9 @@ RingBufferPool::getData(const std::string& call_id)
         if (auto b = rbuf->get(call_id)) {
             mixed = true;
             mixBuffer->mix(*b);
+
+            // voice is true if any of mixed frames has voice
+            mixBuffer->has_voice |= b->has_voice;
         }
     }
 
@@ -352,8 +355,12 @@ RingBufferPool::getAvailableData(const std::string& call_id)
 
     auto buf = std::make_shared<AudioFrame>(internalAudioFormat_);
     for (const auto& rbuf : *bindings) {
-        if (auto b = rbuf->get(call_id))
+        if (auto b = rbuf->get(call_id)) {
             buf->mix(*b);
+
+            // voice is true if any of mixed frames has voice
+            buf->has_voice |= b->has_voice;
+        }
     }
 
     return buf;
diff --git a/src/meson.build b/src/meson.build
index dc1369710c1b183b64b3dfb511d6ccc7933eed38..db94f87b4e5f7cabc43e6dae6c29176560fe664a 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -41,7 +41,7 @@ libjami_sources = files(
     'jamidht/sync_channel_handler.cpp',
     'jamidht/sync_module.cpp',
     'jamidht/transfer_channel_handler.cpp',
-    'media/audio/echo-cancel/null_echo_canceller.cpp',
+    'media/audio/audio-processing/null_audio_processor.cpp',
     'media/audio/sound/audiofile.cpp',
     'media/audio/sound/dtmf.cpp',
     'media/audio/sound/dtmfgenerator.cpp',
@@ -228,11 +228,18 @@ endif
 
 if conf.get('HAVE_WEBRTC_AP') == 1
     libjami_sources += files(
-        'media/audio/echo-cancel/webrtc_echo_canceller.cpp'
+        'media/audio/audio-processing/webrtc.cpp'
     )
     libjami_dependencies += depwebrtcap
 endif
 
+if conf.get('HAVE_SPEEXDSP') == 1
+    libjami_sources += files(
+        'media/audio/audio-processing/speex.cpp'
+    )
+    libjami_dependencies += depspeexdsp
+endif
+
 if conf.get('ENABLE_VIDEO')
     libjami_sources += files(
         'media/video/filter_transpose.cpp',