webrtc: use float planar as the input format

Change-Id: I2e7aa160e6d4f04c6ce7af865f8b89b078f2bd2e

webrtc: use float planar as the input format
4a8f2a1d · Adrien Béraud · Adrien Béraud · 9e20b99f · 4a8f2a1d · 4a8f2a1d
Commit 4a8f2a1d authored 2 years ago by Adrien Béraud Committed by Adrien Béraud 1 year ago
--- a/src/media/audio/audio-processing/audio_processor.h
+++ b/src/media/audio/audio-processing/audio_processor.h
@@ -180,8 +180,7 @@ private:
    void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf)
    {
        if (buf->getFormat() != format_) {
-            auto resampled = resampler_->resample(std::move(buf), format_);
+            frameResizer.enqueue(resampler_->resample(std::move(buf), format_));
-            frameResizer.enqueue(std::move(resampled));
        } else
            frameResizer.enqueue(std::move(buf));
    };

--- a/src/media/audio/audio-processing/webrtc.cpp
+++ b/src/media/audio/audio-processing/webrtc.cpp
@@ -23,17 +23,25 @@
 namespace jami {
+inline AudioFormat
+audioFormatToFloatPlanar(AudioFormat format)
+{
+    return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_FLTP};
+}
+inline size_t
+webrtcFrameSize(AudioFormat format)
+{
+    return (size_t) (webrtc::AudioProcessing::kChunkSizeMs * format.sample_rate / 1000);
+}
 constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError;
-WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSize)
+WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned /* frameSize */)
-    : AudioProcessor(format, frameSize)
+    : AudioProcessor(audioFormatToFloatPlanar(format), webrtcFrameSize(format))
-    , fRecordBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
-    , fPlaybackBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0))
-    , iRecordBuffer_(frameSize_, format)
-    , iPlaybackBuffer_(frameSize_, format)
 {
-    JAMI_DBG("[webrtc-ap] WebRTCAudioProcessor, frame size = %d (=%d ms), channels = %d",
+    JAMI_LOG("[webrtc-ap] WebRTCAudioProcessor, frame size = {:d} (={:d} ms), channels = {:d}",
-             frameSize,
+             frameSize_,
             frameDurationMs_,
             format.nb_channels);
    webrtc::Config config;
@@ -51,74 +59,72 @@ WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSiz
    };
    if (apm->Initialize(pconfig) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
+        JAMI_ERROR("[webrtc-ap] Error initialising audio processing module");
    }
-    JAMI_INFO("[webrtc-ap] Done initializing");
 }
 void
 WebRTCAudioProcessor::enableNoiseSuppression(bool enabled)
 {
-    JAMI_DBG("[webrtc-ap] enableNoiseSuppression %d", enabled);
+    JAMI_LOG("[webrtc-ap] enableNoiseSuppression {}", enabled);
    if (apm->noise_suppression()->Enable(enabled) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling noise suppression");
+        JAMI_ERROR("[webrtc-ap] Error enabling noise suppression");
    }
    if (apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kVeryHigh) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting noise suppression level");
+        JAMI_ERROR("[webrtc-ap] Error setting noise suppression level");
    }
    if (apm->high_pass_filter()->Enable(enabled) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling high pass filter");
+        JAMI_ERROR("[webrtc-ap] Error enabling high pass filter");
    }
 }
 void
 WebRTCAudioProcessor::enableAutomaticGainControl(bool enabled)
 {
-    JAMI_DBG("[webrtc-ap] enableAutomaticGainControl %d", enabled);
+    JAMI_LOG("[webrtc-ap] enableAutomaticGainControl {}", enabled);
    if (apm->gain_control()->Enable(enabled) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling automatic gain control");
+        JAMI_ERROR("[webrtc-ap] Error enabling automatic gain control");
    }
    if (apm->gain_control()->set_analog_level_limits(0, 255) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting automatic gain control analog level limits");
+        JAMI_ERROR("[webrtc-ap] Error setting automatic gain control analog level limits");
    }
    if (apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting automatic gain control mode");
+        JAMI_ERROR("[webrtc-ap] Error setting automatic gain control mode");
    }
 }
 void
 WebRTCAudioProcessor::enableEchoCancel(bool enabled)
 {
-    JAMI_DBG("[webrtc-ap] enableEchoCancel %d", enabled);
+    JAMI_LOG("[webrtc-ap] enableEchoCancel {}", enabled);
    if (apm->echo_cancellation()->Enable(enabled) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling echo cancellation");
+        JAMI_ERROR("[webrtc-ap] Error enabling echo cancellation");
    }
    if (apm->echo_cancellation()->set_suppression_level(
            webrtc::EchoCancellation::SuppressionLevel::kHighSuppression)
        != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting echo cancellation level");
+        JAMI_ERROR("[webrtc-ap] Error setting echo cancellation level");
    }
    if (apm->echo_cancellation()->enable_drift_compensation(true) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling echo cancellation drift compensation");
+        JAMI_ERROR("[webrtc-ap] Error enabling echo cancellation drift compensation");
    }
 }
 void
 WebRTCAudioProcessor::enableVoiceActivityDetection(bool enabled)
 {
-    JAMI_DBG("[webrtc-ap] enableVoiceActivityDetection %d", enabled);
+    JAMI_LOG("[webrtc-ap] enableVoiceActivityDetection {}", enabled);
    if (apm->voice_detection()->Enable(enabled) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling voice activation detection");
+        JAMI_ERROR("[webrtc-ap] Error enabling voice activation detection");
    }
    if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
        != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
+        JAMI_ERROR("[webrtc-ap] Error setting voice detection likelihood");
    }
    // asserted to be 10 in voice_detection_impl.cc
    if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
+        JAMI_ERROR("[webrtc-ap] Error setting voice detection frame size");
    }
 }
@@ -136,42 +142,15 @@ WebRTCAudioProcessor::getProcessed()
    if (!playback || !record) {
        return {};
    }
-    auto processed = std::make_shared<AudioFrame>(format_, frameSize_);
-    // webrtc::StreamConfig& sc = streamConfig;
    webrtc::StreamConfig sc((int) format_.sample_rate, (int) format_.nb_channels);
-    // analyze deinterleaved float playback data
-    iPlaybackBuffer_.deinterleave((const AudioSample*) playback->pointer()->data[0],
-                                  frameSize_,
-                                  format_.nb_channels);
-    std::vector<float*> playData {format_.nb_channels};
-    for (unsigned channel = 0; channel < format_.nb_channels; ++channel) {
-        // point playData channel to appropriate data location
-        playData[channel] = fPlaybackBuffer_[channel].data();
-        // write playback to playData channel
-        iPlaybackBuffer_.channelToFloat(playData[channel], (int) channel);
-    }
    // process reverse in place
-    if (apm->ProcessReverseStream(playData.data(), sc, sc, playData.data()) != webrtcNoError) {
+    float** playData = (float**) playback->pointer()->extended_data;
+    if (apm->ProcessReverseStream(playData, sc, sc, playData) != webrtcNoError) {
        JAMI_ERR("[webrtc-ap] ProcessReverseStream failed");
    }
    // process deinterleaved float recorded data
-    iRecordBuffer_.deinterleave((const AudioSample*) record->pointer()->data[0],
-                                frameSize_,
-                                format_.nb_channels);
-    std::vector<float*> recData {format_.nb_channels};
-    for (unsigned int channel = 0; channel < format_.nb_channels; ++channel) {
-        // point recData channel to appropriate data location
-        recData[channel] = fRecordBuffer_[channel].data();
-        // write data to recData channel
-        iRecordBuffer_.channelToFloat(recData[channel], (int) channel);
-    }
    // TODO: maybe implement this to see if it's better than automatic drift compensation
    // (it MUST be called prior to ProcessStream)
    // delay = (t_render - t_analyze) + (t_process - t_capture)
@@ -185,22 +164,15 @@ WebRTCAudioProcessor::getProcessed()
    apm->echo_cancellation()->set_stream_drift_samples(driftSamples);
    // process in place
-    if (apm->ProcessStream(recData.data(), sc, sc, recData.data()) != webrtcNoError) {
+    float** recData = (float**) record->pointer()->extended_data;
+    if (apm->ProcessStream(recData, sc, sc, recData) != webrtcNoError) {
        JAMI_ERR("[webrtc-ap] ProcessStream failed");
    }
    analogLevel_ = apm->gain_control()->stream_analog_level();
+    record->has_voice = apm->voice_detection()->is_enabled()
-    // return interleaved s16 data
-    iRecordBuffer_.convertFloatPlanarToSigned16((uint8_t**) recData.data(),
-                                                frameSize_,
-                                                format_.nb_channels);
-    iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
-    processed->has_voice = apm->voice_detection()->is_enabled()
                           && getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
+    return record;
-    return processed;
 }
 } // namespace jami
--- a/src/media/audio/audio-processing/webrtc.h
+++ b/src/media/audio/audio-processing/webrtc.h
@@ -42,12 +42,6 @@ public:
 private:
    std::unique_ptr<webrtc::AudioProcessing> apm;
-    using fChannelBuffer = std::vector<std::vector<float>>;
-    fChannelBuffer fRecordBuffer_;
-    fChannelBuffer fPlaybackBuffer_;
-    AudioBuffer iRecordBuffer_;
-    AudioBuffer iPlaybackBuffer_;
    int analogLevel_ {0};
 };
 } // namespace jami