Skip to content
Snippets Groups Projects
Commit aa9a39e2 authored by Tobias Hildebrandt's avatar Tobias Hildebrandt Committed by Adrien Béraud
Browse files

audio: integrate audio processor preferences

* add check for PulseAudio system echo cancel module
* audio processor preferences can currently only be changed in dring.yml
  * not yet integrated into daemon/client signaling system

Gitlab: #692
Change-Id: I57af6e844acbbfdb5a78d95a87a98873757c506d
parent 869c3fe9
No related branches found
No related tags found
No related merge requests found
Showing with 244 additions and 77 deletions
......@@ -82,6 +82,11 @@ public:
*/
virtual void enableAutomaticGainControl(bool enabled) = 0;
/**
* @brief Set the status of voice activity detection
*/
virtual void enableVoiceActivityDetection(bool enabled) = 0;
protected:
AudioFrameResizer playbackQueue_;
AudioFrameResizer recordQueue_;
......
......@@ -35,6 +35,8 @@ public:
void enableNoiseSuppression(bool) override {};
void enableAutomaticGainControl(bool) override {};
void enableVoiceActivityDetection(bool) override {};
};
} // namespace jami
......@@ -152,6 +152,21 @@ SpeexAudioProcessor::enableAutomaticGainControl(bool enabled)
}
}
void
SpeexAudioProcessor::enableVoiceActivityDetection(bool enabled)
{
JAMI_DBG("[speex-dsp] enableVoiceActivityDetection %d", enabled);
shouldDetectVoice = enabled;
spx_int32_t speexSetValue = (spx_int32_t) enabled;
for (auto& channelPreprocessorState : preprocessorStates) {
speex_preprocess_ctl(channelPreprocessorState.get(),
SPEEX_PREPROCESS_SET_VAD,
&speexSetValue);
}
}
std::shared_ptr<AudioFrame>
SpeexAudioProcessor::getProcessed()
{
......@@ -212,7 +227,7 @@ SpeexAudioProcessor::getProcessed()
iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);
// add stabilized voice activity to the AudioFrame
processed->has_voice = getStabilizedVoiceActivity(overallVad);
processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
return processed;
}
......
......@@ -43,6 +43,7 @@ public:
void enableEchoCancel(bool enabled) override;
void enableNoiseSuppression(bool enabled) override;
void enableAutomaticGainControl(bool enabled) override;
void enableVoiceActivityDetection(bool enabled) override;
private:
using SpeexEchoStatePtr = std::unique_ptr<SpeexEchoState, void (*)(SpeexEchoState*)>;
......@@ -59,5 +60,9 @@ private:
// if we should do echo cancellation
bool shouldAEC {false};
// if we should do voice activity detection
// preprocess_run returns 1 if vad is disabled, so we have to know whether or not to ignore it
bool shouldDetectVoice {false};
};
} // namespace jami
......@@ -53,20 +53,6 @@ WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSiz
JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
}
// voice activity
if (apm->voice_detection()->Enable(true) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error enabling voice detection");
}
// TODO: change likelihood?
if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
!= webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
}
// asserted to be 10 in voice_detection_impl.cc
if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
}
JAMI_INFO("[webrtc-ap] Done initializing");
}
......@@ -118,6 +104,23 @@ WebRTCAudioProcessor::enableEchoCancel(bool enabled)
}
}
void
WebRTCAudioProcessor::enableVoiceActivityDetection(bool enabled)
{
JAMI_DBG("[webrtc-ap] enableVoiceActivityDetection %d", enabled);
if (apm->voice_detection()->Enable(enabled) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error enabling voice activation detection");
}
if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
!= webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
}
// asserted to be 10 in voice_detection_impl.cc
if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
}
}
std::shared_ptr<AudioFrame>
WebRTCAudioProcessor::getProcessed()
{
......@@ -193,7 +196,8 @@ WebRTCAudioProcessor::getProcessed()
format_.nb_channels);
iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
processed->has_voice = getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
processed->has_voice = apm->voice_detection()->is_enabled()
&& getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
return processed;
}
......
......@@ -38,6 +38,7 @@ public:
void enableEchoCancel(bool enabled) override;
void enableNoiseSuppression(bool enabled) override;
void enableAutomaticGainControl(bool enabled) override;
void enableVoiceActivityDetection(bool enabled) override;
private:
std::unique_ptr<webrtc::AudioProcessing> apm;
......
......@@ -28,13 +28,12 @@
#include "tonecontrol.h"
#include "client/ring_signal.h"
// TODO: decide which library to use/how to decide (compile time? runtime?)
#include "audio-processing/null_audio_processor.h"
#if HAVE_WEBRTC_AP
#include "audio-processing/webrtc.h"
#elif HAVE_SPEEXDSP
#endif
#if HAVE_SPEEXDSP
#include "audio-processing/speex.h"
#else
#include "audio-processing/null_audio_processor.h"
#endif
#include <ctime>
......@@ -54,8 +53,16 @@ AudioLayer::AudioLayer(const AudioPreference& pref)
, urgentRingBuffer_("urgentRingBuffer_id", SIZEBUF, audioFormat_)
, resampler_(new Resampler)
, lastNotificationTime_()
, pref_(pref)
{
urgentRingBuffer_.createReadOffset(RingBufferPool::DEFAULT_ID);
JAMI_INFO("[audiolayer] AGC: %d, noiseReduce: %d, VAD: %d, echoCancel: %s, audioProcessor: %s",
pref_.isAGCEnabled(),
pref.getNoiseReduce(),
pref.getVadEnabled(),
pref.getEchoCanceller().c_str(),
pref.getAudioProcessor().c_str());
}
AudioLayer::~AudioLayer() {}
......@@ -120,14 +127,27 @@ AudioLayer::recordChanged(bool started)
recordStarted_ = started;
}
// helper function
static inline bool
shouldUseAudioProcessorEchoCancel(bool hasNativeAEC, const std::string& echoCancellerPref)
{
return
// user doesn't care which and there is a system AEC
(echoCancellerPref == "auto" && !hasNativeAEC)
// use specifically wants audioProcessor
or (echoCancellerPref == "audioProcessor");
}
void
AudioLayer::setHasNativeAEC(bool hasEAC)
AudioLayer::setHasNativeAEC(bool hasNativeAEC)
{
JAMI_INFO("[audiolayer] setHasNativeAEC: %d", hasNativeAEC);
std::lock_guard<std::mutex> lock(audioProcessorMutex);
hasNativeAEC_ = hasEAC;
hasNativeAEC_ = hasNativeAEC;
// if we have a current audio processor, tell it to enable/disable its own AEC
if (audioProcessor) {
audioProcessor->enableEchoCancel(!hasEAC);
audioProcessor->enableEchoCancel(
shouldUseAudioProcessorEchoCancel(hasNativeAEC, pref_.getEchoCanceller()));
}
}
......@@ -145,14 +165,14 @@ AudioLayer::createAudioProcessor()
AudioFormat formatForProcessor {sample_rate, nb_channels};
#if HAVE_SPEEXDSP && !HAVE_WEBRTC_AP
// we are using speex
// TODO: maybe force this to be equivalent to 20ms? as expected by speex
auto frame_size = sample_rate / 50u;
#else
// we are using either webrtc-ap or null
auto frame_size = sample_rate / 100u;
#endif
unsigned int frame_size;
if (pref_.getAudioProcessor() == "speex") {
// TODO: maybe force this to be equivalent to 20ms? as expected by speex
frame_size = sample_rate / 50u;
} else {
frame_size = sample_rate / 100u;
}
JAMI_WARN("Input {%d Hz, %d channels}",
audioInputFormat_.sample_rate,
audioInputFormat_.nb_channels);
......@@ -162,23 +182,41 @@ AudioLayer::createAudioProcessor()
nb_channels,
frame_size);
if (pref_.getAudioProcessor() == "webrtc") {
#if HAVE_WEBRTC_AP
JAMI_INFO("[audiolayer] using webrtc audio processor");
audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
#elif HAVE_SPEEXDSP
JAMI_INFO("[audiolayer] using speex audio processor");
audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
JAMI_WARN("[audiolayer] using WebRTCAudioProcessor");
audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
#else
JAMI_ERR("[audiolayer] audioProcessor preference is webrtc, but library not linked! "
"using NullAudioProcessor instead");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
#endif
} else if (pref_.getAudioProcessor() == "speex") {
#if HAVE_SPEEXDSP
JAMI_WARN("[audiolayer] using SpeexAudioProcessor");
audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
#else
JAMI_INFO("[audiolayer] using null audio processor");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
JAMI_ERR("[audiolayer] audioProcessor preference is speex, but library not linked! "
"using NullAudioProcessor instead");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
#endif
} else if (pref_.getAudioProcessor() == "null") {
JAMI_WARN("[audiolayer] using NullAudioProcessor");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
} else {
JAMI_ERR("[audiolayer] audioProcessor preference not recognized, using NullAudioProcessor "
"instead");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
}
audioProcessor->enableNoiseSuppression(pref_.getNoiseReduce());
audioProcessor->enableAutomaticGainControl(pref_.isAGCEnabled());
audioProcessor->enableNoiseSuppression(true);
// TODO: enable AGC?
audioProcessor->enableAutomaticGainControl(false);
audioProcessor->enableEchoCancel(
shouldUseAudioProcessorEchoCancel(hasNativeAEC_, pref_.getEchoCanceller()));
// can also be updated after creation via setHasNativeAEC
audioProcessor->enableEchoCancel(!hasNativeAEC_);
audioProcessor->enableVoiceActivityDetection(pref_.getVadEnabled());
}
// must acquire lock beforehand
......
......@@ -250,6 +250,9 @@ protected:
*/
double playbackGain_;
// audio processor preferences
const AudioPreference& pref_;
/**
* Buffers for audio processing
*/
......
......@@ -35,11 +35,13 @@ AudioStream::AudioStream(pa_context* c,
unsigned samplrate,
const PaDeviceInfos& infos,
bool ec,
OnReady onReady, OnData onData)
OnReady onReady,
OnData onData)
: onReady_(std::move(onReady))
, onData_(std::move(onData))
, audiostream_(nullptr)
, mainloop_(m)
, audioType_(type)
{
pa_sample_spec sample_spec = {PA_SAMPLE_S16LE, // PA_SAMPLE_FLOAT32LE,
samplrate,
......@@ -88,29 +90,37 @@ AudioStream::AudioStream(pa_context* c,
PA_STREAM_ADJUST_LATENCY | PA_STREAM_AUTO_TIMING_UPDATE | PA_STREAM_START_CORKED);
if (type == AudioDeviceType::PLAYBACK || type == AudioDeviceType::RINGTONE) {
pa_stream_set_write_callback(audiostream_, [](pa_stream* /*s*/, size_t bytes, void* userdata) {
static_cast<AudioStream*>(userdata)->onData_(bytes);
}, this);
pa_stream_set_write_callback(
audiostream_,
[](pa_stream* /*s*/, size_t bytes, void* userdata) {
static_cast<AudioStream*>(userdata)->onData_(bytes);
},
this);
pa_stream_connect_playback(audiostream_,
infos.name.empty() ? nullptr : infos.name.c_str(),
&attributes,
flags,
nullptr,
nullptr);
infos.name.empty() ? nullptr : infos.name.c_str(),
&attributes,
flags,
nullptr,
nullptr);
} else if (type == AudioDeviceType::CAPTURE) {
pa_stream_set_read_callback(audiostream_, [](pa_stream* /*s*/, size_t bytes, void* userdata) {
static_cast<AudioStream*>(userdata)->onData_(bytes);
}, this);
pa_stream_set_read_callback(
audiostream_,
[](pa_stream* /*s*/, size_t bytes, void* userdata) {
static_cast<AudioStream*>(userdata)->onData_(bytes);
},
this);
pa_stream_connect_record(audiostream_,
infos.name.empty() ? nullptr : infos.name.c_str(),
&attributes,
flags);
infos.name.empty() ? nullptr : infos.name.c_str(),
&attributes,
flags);
}
}
void disconnectStream(pa_stream* s) {
void
disconnectStream(pa_stream* s)
{
// make sure we don't get any further callback
pa_stream_set_write_callback(s, nullptr, nullptr);
pa_stream_set_read_callback(s, nullptr, nullptr);
......@@ -121,7 +131,9 @@ void disconnectStream(pa_stream* s) {
pa_stream_set_started_callback(s, nullptr, nullptr);
}
void destroyStream(pa_stream* s) {
void
destroyStream(pa_stream* s)
{
pa_stream_disconnect(s);
pa_stream_set_state_callback(s, nullptr, nullptr);
disconnectStream(s);
......@@ -137,6 +149,9 @@ void
AudioStream::start()
{
pa_stream_cork(audiostream_, 0, nullptr, nullptr);
// trigger echo cancel check
moved(audiostream_);
}
void
......@@ -147,9 +162,8 @@ AudioStream::stop()
JAMI_DBG("Destroying stream with device %s", pa_stream_get_device_name(audiostream_));
if (pa_stream_get_state(audiostream_) == PA_STREAM_CREATING) {
disconnectStream(audiostream_);
pa_stream_set_state_callback(audiostream_, [](pa_stream* s, void*){
destroyStream(s);
}, nullptr);
pa_stream_set_state_callback(
audiostream_, [](pa_stream* s, void*) { destroyStream(s); }, nullptr);
} else {
destroyStream(audiostream_);
}
......@@ -160,13 +174,53 @@ void
AudioStream::moved(pa_stream* s)
{
audiostream_ = s;
JAMI_DBG("Stream %d to %s", pa_stream_get_index(s), pa_stream_get_device_name(s));
JAMI_DBG("[audiostream] Stream moved: %d, %s",
pa_stream_get_index(s),
pa_stream_get_device_name(s));
if (audioType_ == AudioDeviceType::CAPTURE) {
// check for echo cancel
const char* name = pa_stream_get_device_name(s);
if (!name) {
JAMI_ERR("[audiostream] moved() unable to get audio stream device");
return;
}
pa_context* context = pa_stream_get_context(s);
auto* op = pa_context_get_source_info_by_name(
context,
name,
[](pa_context* /*c*/, const pa_source_info* i, int /*eol*/, void* userdata) {
AudioStream* thisPtr = (AudioStream*) userdata;
// this whole closure gets called twice by pulse for some reason
// the 2nd time, i is invalid
if (!i) {
// JAMI_ERR("[audiostream] source info not found for %s", realName);
return;
}
// string compare
bool usingEchoCancel = std::string_view(i->driver) == "module-echo-cancel.c";
JAMI_WARN("[audiostream] capture stream using pulse echo cancel module? %s (%s)",
usingEchoCancel ? "yes" : "no",
i->name);
if (!thisPtr) {
JAMI_ERR("[audiostream] AudioStream pointer became invalid during "
"pa_source_info_cb_t callback!");
return;
}
thisPtr->echoCancelCb(usingEchoCancel);
},
this);
pa_operation_unref(op);
}
}
void
AudioStream::stateChanged(pa_stream* s)
{
//UNUSED char str[PA_SAMPLE_SPEC_SNPRINT_MAX];
// UNUSED char str[PA_SAMPLE_SPEC_SNPRINT_MAX];
switch (pa_stream_get_state(s)) {
case PA_STREAM_CREATING:
......
......@@ -52,7 +52,8 @@ public:
unsigned,
const PaDeviceInfos&,
bool,
OnReady onReady, OnData onData);
OnReady onReady,
OnData onData);
~AudioStream();
......@@ -88,6 +89,8 @@ public:
bool isReady();
void setEchoCancelCb(std::function<void(bool)>&& cb) { echoCancelCb = cb; }
private:
NON_COPYABLE(AudioStream);
......@@ -109,6 +112,16 @@ private:
* A pointer to the opaque threaded main loop object
*/
pa_threaded_mainloop* mainloop_;
/**
* The type of this audio stream
*/
AudioDeviceType audioType_;
/**
* Function called whenever the stream is moved and we check for an echo canceller
*/
std::function<void(bool)> echoCancelCb;
};
} // namespace jami
......@@ -432,11 +432,12 @@ PulseLayer::startStream(AudioDeviceType type)
{
waitForDevices();
PulseMainLoopLock lock(mainloop_.get());
bool ec = preference_.getEchoCanceller() == "system"
|| preference_.getEchoCanceller() == "auto";
// Create Streams
if (type == AudioDeviceType::PLAYBACK) {
if (auto dev_infos = getDeviceInfos(sinkList_, getPreferredPlaybackDevice())) {
bool ec = preference_.getEchoCanceller() == "system";
createStream(playback_,
type,
*dev_infos,
......@@ -451,8 +452,12 @@ PulseLayer::startStream(AudioDeviceType type)
false,
std::bind(&PulseLayer::ringtoneToSpeaker, this));
} else if (type == AudioDeviceType::CAPTURE) {
if (auto dev_infos = getDeviceInfos(sourceList_, getPreferredCaptureDevice()))
createStream(record_, type, *dev_infos, true, std::bind(&PulseLayer::readFromMic, this));
if (auto dev_infos = getDeviceInfos(sourceList_, getPreferredCaptureDevice())) {
createStream(record_, type, *dev_infos, ec, std::bind(&PulseLayer::readFromMic, this));
// whenever the stream is moved, it will call this cb
record_->setEchoCancelCb([this](bool echoCancel) { setHasNativeAEC(echoCancel); });
}
}
pa_threaded_mainloop_signal(mainloop_.get(), 0);
......
......@@ -117,11 +117,13 @@ static constexpr const char* RECORDPATH_KEY {"recordPath"};
static constexpr const char* ALWAYS_RECORDING_KEY {"alwaysRecording"};
static constexpr const char* VOLUMEMIC_KEY {"volumeMic"};
static constexpr const char* VOLUMESPKR_KEY {"volumeSpkr"};
static constexpr const char* ECHO_CANCELLER {"echoCanceller"};
static constexpr const char* AUDIO_PROCESSOR_KEY {"audioProcessor"};
static constexpr const char* NOISE_REDUCE_KEY {"noiseReduce"};
static constexpr const char* AGC_KEY {"automaticGainControl"};
static constexpr const char* CAPTURE_MUTED_KEY {"captureMuted"};
static constexpr const char* PLAYBACK_MUTED_KEY {"playbackMuted"};
static constexpr const char* VAD_KEY {"voiceActivityDetection"};
static constexpr const char* ECHO_CANCEL_KEY {"echoCancel"};
#ifdef ENABLE_VIDEO
// video preferences
......@@ -284,9 +286,11 @@ AudioPreference::AudioPreference()
, alwaysRecording_(false)
, volumemic_(1.0)
, volumespkr_(1.0)
, echoCanceller_("system")
, audioProcessor_("webrtc")
, denoise_(false)
, agcEnabled_(false)
, vadEnabled_(true)
, echoCanceller_("auto")
, captureMuted_(false)
, playbackMuted_(false)
{}
......@@ -424,9 +428,7 @@ AudioPreference::serialize(YAML::Emitter& out) const
// common options
out << YAML::Key << ALWAYS_RECORDING_KEY << YAML::Value << alwaysRecording_;
out << YAML::Key << AUDIO_API_KEY << YAML::Value << audioApi_;
out << YAML::Key << AGC_KEY << YAML::Value << agcEnabled_;
out << YAML::Key << CAPTURE_MUTED_KEY << YAML::Value << captureMuted_;
out << YAML::Key << NOISE_REDUCE_KEY << YAML::Value << denoise_;
out << YAML::Key << PLAYBACK_MUTED_KEY << YAML::Value << playbackMuted_;
// pulse submap
......@@ -447,7 +449,13 @@ AudioPreference::serialize(YAML::Emitter& out) const
out << YAML::Key << RECORDPATH_KEY << YAML::Value << recordpath_;
out << YAML::Key << VOLUMEMIC_KEY << YAML::Value << volumemic_;
out << YAML::Key << VOLUMESPKR_KEY << YAML::Value << volumespkr_;
out << YAML::Key << ECHO_CANCELLER << YAML::Value << echoCanceller_;
// audio processor options, not in a submap
out << YAML::Key << AUDIO_PROCESSOR_KEY << YAML::Value << audioProcessor_;
out << YAML::Key << AGC_KEY << YAML::Value << agcEnabled_;
out << YAML::Key << VAD_KEY << YAML::Value << vadEnabled_;
out << YAML::Key << NOISE_REDUCE_KEY << YAML::Value << denoise_;
out << YAML::Key << ECHO_CANCEL_KEY << YAML::Value << echoCanceller_;
out << YAML::EndMap;
}
......@@ -502,7 +510,9 @@ AudioPreference::unserialize(const YAML::Node& in)
parseValue(node, RECORDPATH_KEY, recordpath_);
parseValue(node, VOLUMEMIC_KEY, volumemic_);
parseValue(node, VOLUMESPKR_KEY, volumespkr_);
parseValue(node, ECHO_CANCELLER, echoCanceller_);
parseValue(node, AUDIO_PROCESSOR_KEY, audioProcessor_);
parseValue(node, VAD_KEY, vadEnabled_);
parseValue(node, ECHO_CANCEL_KEY, echoCanceller_);
}
#ifdef ENABLE_VIDEO
......
......@@ -236,9 +236,17 @@ public:
void setPlaybackMuted(bool muted) { playbackMuted_ = muted; }
const std::string& getAudioProcessor() const { return audioProcessor_; }
void setAudioProcessor(const std::string& ap) { audioProcessor_ = ap; }
bool getVadEnabled() const { return vadEnabled_; }
void setVad(bool enable) { vadEnabled_ = enable; }
const std::string& getEchoCanceller() const { return echoCanceller_; }
void setEchoCanceller(const std::string& ec) { echoCanceller_ = ec; }
void setEchoCancel(std::string& canceller) { echoCanceller_ = canceller; }
private:
std::string audioApi_;
......@@ -265,10 +273,14 @@ private:
bool alwaysRecording_;
double volumemic_;
double volumespkr_;
std::string echoCanceller_;
// audio processor preferences
std::string audioProcessor_;
bool denoise_;
bool agcEnabled_;
bool vadEnabled_;
std::string echoCanceller_;
bool captureMuted_;
bool playbackMuted_;
constexpr static const char* const CONFIG_LABEL = "audio";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment