Skip to content
Snippets Groups Projects
Commit d10fa634 authored by Adrien Béraud's avatar Adrien Béraud Committed by Adrien Béraud
Browse files

speex: use AVFrame instead of AudioBuffer

Change-Id: I680b550f6412925e0fac0e732f40e7e319f01cc8
parent d09a15e1
Branches
Tags
No related merge requests found
...@@ -34,19 +34,25 @@ extern "C" { ...@@ -34,19 +34,25 @@ extern "C" {
namespace jami { namespace jami {
inline AudioFormat
audioFormatToSampleFormat(AudioFormat format)
{
return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_S16};
}
SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize) SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
: AudioProcessor(format, frameSize) : AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_S16), frameSize)
, echoState(speex_echo_state_init_mc((int) frameSize, , echoState(speex_echo_state_init_mc((int) frameSize,
(int) frameSize * 16, (int) frameSize * 16,
(int) format.nb_channels, (int) format_.nb_channels,
(int) format.nb_channels), (int) format_.nb_channels),
&speex_echo_state_destroy) &speex_echo_state_destroy)
, iProcBuffer(frameSize_, format) , procBuffer(std::make_unique<AudioFrame>(format.withSampleFormat(AV_SAMPLE_FMT_S16P), frameSize_))
{ {
JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d", JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d",
frameSize, frameSize,
frameDurationMs_, frameDurationMs_,
format.nb_channels); format_.nb_channels);
// set up speex echo state // set up speex echo state
speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate); speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
...@@ -66,10 +72,10 @@ SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize) ...@@ -66,10 +72,10 @@ SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
// set up speex preprocess states, one for each channel // set up speex preprocess states, one for each channel
// note that they are not enabled here, but rather in the enable* functions // note that they are not enabled here, but rather in the enable* functions
for (unsigned int i = 0; i < format.nb_channels; i++) { for (unsigned int i = 0; i < format_.nb_channels; i++) {
auto channelPreprocessorState auto channelPreprocessorState
= SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize, = SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize,
(int) format.sample_rate), (int) format_.sample_rate),
&speex_preprocess_state_destroy); &speex_preprocess_state_destroy);
// set max noise suppression level // set max noise suppression level
...@@ -184,11 +190,11 @@ SpeexAudioProcessor::getProcessed() ...@@ -184,11 +190,11 @@ SpeexAudioProcessor::getProcessed()
return {}; return {};
} }
auto processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize()); std::shared_ptr<AudioFrame> processed;
if (shouldAEC) { if (shouldAEC) {
// we want to echo cancel // we want to echo cancel
// multichannel, output into processed // multichannel, output into processed
processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
speex_echo_cancellation(echoState.get(), speex_echo_cancellation(echoState.get(),
(int16_t*) record->pointer()->data[0], (int16_t*) record->pointer()->data[0],
(int16_t*) playback->pointer()->data[0], (int16_t*) playback->pointer()->data[0],
...@@ -198,16 +204,7 @@ SpeexAudioProcessor::getProcessed() ...@@ -198,16 +204,7 @@ SpeexAudioProcessor::getProcessed()
processed = record; processed = record;
} }
// deinterleave processed into channels deinterleaveResampler.resample(processed->pointer(), procBuffer->pointer());
std::vector<int16_t*> procData {format_.nb_channels};
iProcBuffer.deinterleave((const AudioSample*) processed->pointer()->data[0],
frameSize_,
format_.nb_channels);
// point procData to correct channels
for (unsigned int channel = 0; channel < format_.nb_channels; channel++) {
procData[channel] = iProcBuffer.getChannel(channel)->data();
}
// overall voice activity // overall voice activity
bool overallVad = false; bool overallVad = false;
...@@ -218,7 +215,7 @@ SpeexAudioProcessor::getProcessed() ...@@ -218,7 +215,7 @@ SpeexAudioProcessor::getProcessed()
int channel = 0; int channel = 0;
for (auto& channelPreprocessorState : preprocessorStates) { for (auto& channelPreprocessorState : preprocessorStates) {
// preprocesses in place, returns voice activity boolean // preprocesses in place, returns voice activity boolean
channelVad = speex_preprocess_run(channelPreprocessorState.get(), procData[channel]); channelVad = speex_preprocess_run(channelPreprocessorState.get(), (int16_t*)procBuffer->pointer()->data[channel]);
// boolean OR // boolean OR
overallVad |= channelVad; overallVad |= channelVad;
...@@ -226,12 +223,10 @@ SpeexAudioProcessor::getProcessed() ...@@ -226,12 +223,10 @@ SpeexAudioProcessor::getProcessed()
channel += 1; channel += 1;
} }
// reinterleave into processed interleaveResampler.resample(procBuffer->pointer(), processed->pointer());
iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);
// add stabilized voice activity to the AudioFrame // add stabilized voice activity to the AudioFrame
processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad); processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
return processed; return processed;
} }
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#pragma once #pragma once
#include "audio_processor.h" #include "audio_processor.h"
#include "media/audio/audiobuffer.h"
// typedef speex C structs // typedef speex C structs
extern "C" { extern "C" {
...@@ -57,7 +56,9 @@ private: ...@@ -57,7 +56,9 @@ private:
// one for each channel // one for each channel
std::vector<SpeexPreprocessStatePtr> preprocessorStates; std::vector<SpeexPreprocessStatePtr> preprocessorStates;
AudioBuffer iProcBuffer; std::unique_ptr<AudioFrame> procBuffer {};
Resampler deinterleaveResampler;
Resampler interleaveResampler;
// if we should do echo cancellation // if we should do echo cancellation
bool shouldAEC {false}; bool shouldAEC {false};
......
...@@ -23,12 +23,6 @@ ...@@ -23,12 +23,6 @@
namespace jami { namespace jami {
inline AudioFormat
audioFormatToFloatPlanar(AudioFormat format)
{
return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_FLTP};
}
inline size_t inline size_t
webrtcFrameSize(AudioFormat format) webrtcFrameSize(AudioFormat format)
{ {
...@@ -38,19 +32,19 @@ webrtcFrameSize(AudioFormat format) ...@@ -38,19 +32,19 @@ webrtcFrameSize(AudioFormat format)
constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError; constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError;
WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned /* frameSize */) WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned /* frameSize */)
: AudioProcessor(audioFormatToFloatPlanar(format), webrtcFrameSize(format)) : AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_FLTP), webrtcFrameSize(format))
{ {
JAMI_LOG("[webrtc-ap] WebRTCAudioProcessor, frame size = {:d} (={:d} ms), channels = {:d}", JAMI_LOG("[webrtc-ap] WebRTCAudioProcessor, frame size = {:d} (={:d} ms), channels = {:d}",
frameSize_, frameSize_,
frameDurationMs_, frameDurationMs_,
format.nb_channels); format_.nb_channels);
webrtc::Config config; webrtc::Config config;
config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true)); config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true)); config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
apm.reset(webrtc::AudioProcessing::Create(config)); apm.reset(webrtc::AudioProcessing::Create(config));
webrtc::StreamConfig streamConfig((int) format.sample_rate, (int) format.nb_channels); webrtc::StreamConfig streamConfig((int) format_.sample_rate, (int) format_.nb_channels);
webrtc::ProcessingConfig pconfig = { webrtc::ProcessingConfig pconfig = {
streamConfig, /* input stream */ streamConfig, /* input stream */
streamConfig, /* output stream */ streamConfig, /* output stream */
......
...@@ -59,6 +59,12 @@ struct AudioFormat ...@@ -59,6 +59,12 @@ struct AudioFormat
return fmt::format("{{{}, {} channels, {}Hz}}", av_get_sample_fmt_name(sampleFormat), nb_channels, sample_rate); return fmt::format("{{{}, {} channels, {}Hz}}", av_get_sample_fmt_name(sampleFormat), nb_channels, sample_rate);
} }
inline AudioFormat withSampleFormat(AVSampleFormat format)
{
return {sample_rate, nb_channels, format};
}
/** /**
* Returns bytes necessary to hold one frame of audio data. * Returns bytes necessary to hold one frame of audio data.
*/ */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment