diff --git a/src/media/audio/audio-processing/audio_processor.h b/src/media/audio/audio-processing/audio_processor.h index 07a8800a7c390d3e4fd464584a8f0dfbaad56463..97cbedb2d041ba58560b71d316aeb24cfb645f40 100644 --- a/src/media/audio/audio-processing/audio_processor.h +++ b/src/media/audio/audio-processing/audio_processor.h @@ -180,8 +180,7 @@ private: void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf) { if (buf->getFormat() != format_) { - auto resampled = resampler_->resample(std::move(buf), format_); - frameResizer.enqueue(std::move(resampled)); + frameResizer.enqueue(resampler_->resample(std::move(buf), format_)); } else frameResizer.enqueue(std::move(buf)); }; diff --git a/src/media/audio/audio-processing/webrtc.cpp b/src/media/audio/audio-processing/webrtc.cpp index bea3205616ccf2c3c73e9534c9a5a1e2edfcc9b2..ea6c48fadd5d2ff264c970e7c1f129634dd87833 100644 --- a/src/media/audio/audio-processing/webrtc.cpp +++ b/src/media/audio/audio-processing/webrtc.cpp @@ -23,17 +23,25 @@ namespace jami { +inline AudioFormat +audioFormatToFloatPlanar(AudioFormat format) +{ + return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_FLTP}; +} + +inline size_t +webrtcFrameSize(AudioFormat format) +{ + return (size_t) (webrtc::AudioProcessing::kChunkSizeMs * format.sample_rate / 1000); +} + constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError; -WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSize) - : AudioProcessor(format, frameSize) - , fRecordBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0)) - , fPlaybackBuffer_(format.nb_channels, std::vector<float>(frameSize_, 0)) - , iRecordBuffer_(frameSize_, format) - , iPlaybackBuffer_(frameSize_, format) +WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned /* frameSize */) + : AudioProcessor(audioFormatToFloatPlanar(format), webrtcFrameSize(format)) { - JAMI_DBG("[webrtc-ap] WebRTCAudioProcessor, frame size = %d (=%d ms), channels = %d", - frameSize, + JAMI_LOG("[webrtc-ap] WebRTCAudioProcessor, frame size = {:d} (={:d} ms), channels = {:d}", + frameSize_, frameDurationMs_, format.nb_channels); webrtc::Config config; @@ -51,74 +59,72 @@ WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSiz }; if (apm->Initialize(pconfig) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error initialising audio processing module"); + JAMI_ERROR("[webrtc-ap] Error initialising audio processing module"); } - - JAMI_INFO("[webrtc-ap] Done initializing"); } void WebRTCAudioProcessor::enableNoiseSuppression(bool enabled) { - JAMI_DBG("[webrtc-ap] enableNoiseSuppression %d", enabled); + JAMI_LOG("[webrtc-ap] enableNoiseSuppression {}", enabled); if (apm->noise_suppression()->Enable(enabled) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error enabling noise suppression"); + JAMI_ERROR("[webrtc-ap] Error enabling noise suppression"); } if (apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kVeryHigh) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error setting noise suppression level"); + JAMI_ERROR("[webrtc-ap] Error setting noise suppression level"); } if (apm->high_pass_filter()->Enable(enabled) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error enabling high pass filter"); + JAMI_ERROR("[webrtc-ap] Error enabling high pass filter"); } } void WebRTCAudioProcessor::enableAutomaticGainControl(bool enabled) { - JAMI_DBG("[webrtc-ap] enableAutomaticGainControl %d", enabled); + JAMI_LOG("[webrtc-ap] enableAutomaticGainControl {}", enabled); if (apm->gain_control()->Enable(enabled) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error enabling automatic gain control"); + JAMI_ERROR("[webrtc-ap] Error enabling automatic gain control"); } if (apm->gain_control()->set_analog_level_limits(0, 255) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error setting automatic gain control analog level limits"); + JAMI_ERROR("[webrtc-ap] Error setting automatic gain control analog level limits"); } if (apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error setting automatic gain control mode"); + JAMI_ERROR("[webrtc-ap] Error setting automatic gain control mode"); } } void WebRTCAudioProcessor::enableEchoCancel(bool enabled) { - JAMI_DBG("[webrtc-ap] enableEchoCancel %d", enabled); + JAMI_LOG("[webrtc-ap] enableEchoCancel {}", enabled); if (apm->echo_cancellation()->Enable(enabled) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error enabling echo cancellation"); + JAMI_ERROR("[webrtc-ap] Error enabling echo cancellation"); } if (apm->echo_cancellation()->set_suppression_level( webrtc::EchoCancellation::SuppressionLevel::kHighSuppression) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error setting echo cancellation level"); + JAMI_ERROR("[webrtc-ap] Error setting echo cancellation level"); } if (apm->echo_cancellation()->enable_drift_compensation(true) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error enabling echo cancellation drift compensation"); + JAMI_ERROR("[webrtc-ap] Error enabling echo cancellation drift compensation"); } } void WebRTCAudioProcessor::enableVoiceActivityDetection(bool enabled) { - JAMI_DBG("[webrtc-ap] enableVoiceActivityDetection %d", enabled); + JAMI_LOG("[webrtc-ap] enableVoiceActivityDetection {}", enabled); if (apm->voice_detection()->Enable(enabled) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error enabling voice activation detection"); + JAMI_ERROR("[webrtc-ap] Error enabling voice activation detection"); } if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood"); + JAMI_ERROR("[webrtc-ap] Error setting voice detection likelihood"); } // asserted to be 10 in voice_detection_impl.cc if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) { - JAMI_ERR("[webrtc-ap] Error setting voice detection frame size"); + JAMI_ERROR("[webrtc-ap] Error setting voice detection frame size"); } } @@ -136,42 +142,15 @@ WebRTCAudioProcessor::getProcessed() if (!playback || !record) { return {}; } - - auto processed = std::make_shared<AudioFrame>(format_, frameSize_); - - // webrtc::StreamConfig& sc = streamConfig; webrtc::StreamConfig sc((int) format_.sample_rate, (int) format_.nb_channels); - // analyze deinterleaved float playback data - iPlaybackBuffer_.deinterleave((const AudioSample*) playback->pointer()->data[0], - frameSize_, - format_.nb_channels); - std::vector<float*> playData {format_.nb_channels}; - for (unsigned channel = 0; channel < format_.nb_channels; ++channel) { - // point playData channel to appropriate data location - playData[channel] = fPlaybackBuffer_[channel].data(); - - // write playback to playData channel - iPlaybackBuffer_.channelToFloat(playData[channel], (int) channel); - } - // process reverse in place - if (apm->ProcessReverseStream(playData.data(), sc, sc, playData.data()) != webrtcNoError) { + float** playData = (float**) playback->pointer()->extended_data; + if (apm->ProcessReverseStream(playData, sc, sc, playData) != webrtcNoError) { JAMI_ERR("[webrtc-ap] ProcessReverseStream failed"); } // process deinterleaved float recorded data - iRecordBuffer_.deinterleave((const AudioSample*) record->pointer()->data[0], - frameSize_, - format_.nb_channels); - std::vector<float*> recData {format_.nb_channels}; - for (unsigned int channel = 0; channel < format_.nb_channels; ++channel) { - // point recData channel to appropriate data location - recData[channel] = fRecordBuffer_[channel].data(); - - // write data to recData channel - iRecordBuffer_.channelToFloat(recData[channel], (int) channel); - } // TODO: maybe implement this to see if it's better than automatic drift compensation // (it MUST be called prior to ProcessStream) // delay = (t_render - t_analyze) + (t_process - t_capture) @@ -185,22 +164,15 @@ WebRTCAudioProcessor::getProcessed() apm->echo_cancellation()->set_stream_drift_samples(driftSamples); // process in place - if (apm->ProcessStream(recData.data(), sc, sc, recData.data()) != webrtcNoError) { + float** recData = (float**) record->pointer()->extended_data; + if (apm->ProcessStream(recData, sc, sc, recData) != webrtcNoError) { JAMI_ERR("[webrtc-ap] ProcessStream failed"); } analogLevel_ = apm->gain_control()->stream_analog_level(); - - // return interleaved s16 data - iRecordBuffer_.convertFloatPlanarToSigned16((uint8_t**) recData.data(), - frameSize_, - format_.nb_channels); - iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]); - - processed->has_voice = apm->voice_detection()->is_enabled() + record->has_voice = apm->voice_detection()->is_enabled() && getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice()); - - return processed; + return record; } } // namespace jami diff --git a/src/media/audio/audio-processing/webrtc.h b/src/media/audio/audio-processing/webrtc.h index ab847daf3dc966612ca3a9df7fefe32c2e9a3ad0..6ec8f2ef9077d65b12e9214a0a8fd2f25b20deca 100644 --- a/src/media/audio/audio-processing/webrtc.h +++ b/src/media/audio/audio-processing/webrtc.h @@ -42,12 +42,6 @@ public: private: std::unique_ptr<webrtc::AudioProcessing> apm; - - using fChannelBuffer = std::vector<std::vector<float>>; - fChannelBuffer fRecordBuffer_; - fChannelBuffer fPlaybackBuffer_; - AudioBuffer iRecordBuffer_; - AudioBuffer iPlaybackBuffer_; int analogLevel_ {0}; }; } // namespace jami