Skip to content
Snippets Groups Projects
Commit d10fa634 authored by Adrien Béraud's avatar Adrien Béraud Committed by Adrien Béraud
Browse files

speex: use AVFrame instead of AudioBuffer

Change-Id: I680b550f6412925e0fac0e732f40e7e319f01cc8
parent d09a15e1
Branches
No related tags found
No related merge requests found
......@@ -34,19 +34,25 @@ extern "C" {
namespace jami {
inline AudioFormat
audioFormatToSampleFormat(AudioFormat format)
{
return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_S16};
}
SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
: AudioProcessor(format, frameSize)
: AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_S16), frameSize)
, echoState(speex_echo_state_init_mc((int) frameSize,
(int) frameSize * 16,
(int) format.nb_channels,
(int) format.nb_channels),
(int) format_.nb_channels,
(int) format_.nb_channels),
&speex_echo_state_destroy)
, iProcBuffer(frameSize_, format)
, procBuffer(std::make_unique<AudioFrame>(format.withSampleFormat(AV_SAMPLE_FMT_S16P), frameSize_))
{
JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d",
frameSize,
frameDurationMs_,
format.nb_channels);
format_.nb_channels);
// set up speex echo state
speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
......@@ -66,10 +72,10 @@ SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
// set up speex preprocess states, one for each channel
// note that they are not enabled here, but rather in the enable* functions
for (unsigned int i = 0; i < format.nb_channels; i++) {
for (unsigned int i = 0; i < format_.nb_channels; i++) {
auto channelPreprocessorState
= SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize,
(int) format.sample_rate),
(int) format_.sample_rate),
&speex_preprocess_state_destroy);
// set max noise suppression level
......@@ -184,11 +190,11 @@ SpeexAudioProcessor::getProcessed()
return {};
}
auto processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
std::shared_ptr<AudioFrame> processed;
if (shouldAEC) {
// we want to echo cancel
// multichannel, output into processed
processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
speex_echo_cancellation(echoState.get(),
(int16_t*) record->pointer()->data[0],
(int16_t*) playback->pointer()->data[0],
......@@ -198,16 +204,7 @@ SpeexAudioProcessor::getProcessed()
processed = record;
}
// deinterleave processed into channels
std::vector<int16_t*> procData {format_.nb_channels};
iProcBuffer.deinterleave((const AudioSample*) processed->pointer()->data[0],
frameSize_,
format_.nb_channels);
// point procData to correct channels
for (unsigned int channel = 0; channel < format_.nb_channels; channel++) {
procData[channel] = iProcBuffer.getChannel(channel)->data();
}
deinterleaveResampler.resample(processed->pointer(), procBuffer->pointer());
// overall voice activity
bool overallVad = false;
......@@ -218,7 +215,7 @@ SpeexAudioProcessor::getProcessed()
int channel = 0;
for (auto& channelPreprocessorState : preprocessorStates) {
// preprocesses in place, returns voice activity boolean
channelVad = speex_preprocess_run(channelPreprocessorState.get(), procData[channel]);
channelVad = speex_preprocess_run(channelPreprocessorState.get(), (int16_t*)procBuffer->pointer()->data[channel]);
// boolean OR
overallVad |= channelVad;
......@@ -226,12 +223,10 @@ SpeexAudioProcessor::getProcessed()
channel += 1;
}
// reinterleave into processed
iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);
interleaveResampler.resample(procBuffer->pointer(), processed->pointer());
// add stabilized voice activity to the AudioFrame
processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
return processed;
}
......
......@@ -21,7 +21,6 @@
#pragma once
#include "audio_processor.h"
#include "media/audio/audiobuffer.h"
// typedef speex C structs
extern "C" {
......@@ -57,7 +56,9 @@ private:
// one for each channel
std::vector<SpeexPreprocessStatePtr> preprocessorStates;
AudioBuffer iProcBuffer;
std::unique_ptr<AudioFrame> procBuffer {};
Resampler deinterleaveResampler;
Resampler interleaveResampler;
// if we should do echo cancellation
bool shouldAEC {false};
......
......@@ -23,12 +23,6 @@
namespace jami {
inline AudioFormat
audioFormatToFloatPlanar(AudioFormat format)
{
return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_FLTP};
}
inline size_t
webrtcFrameSize(AudioFormat format)
{
......@@ -38,19 +32,19 @@ webrtcFrameSize(AudioFormat format)
constexpr int webrtcNoError = webrtc::AudioProcessing::kNoError;
WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned /* frameSize */)
: AudioProcessor(audioFormatToFloatPlanar(format), webrtcFrameSize(format))
: AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_FLTP), webrtcFrameSize(format))
{
JAMI_LOG("[webrtc-ap] WebRTCAudioProcessor, frame size = {:d} (={:d} ms), channels = {:d}",
frameSize_,
frameDurationMs_,
format.nb_channels);
format_.nb_channels);
webrtc::Config config;
config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(true));
apm.reset(webrtc::AudioProcessing::Create(config));
webrtc::StreamConfig streamConfig((int) format.sample_rate, (int) format.nb_channels);
webrtc::StreamConfig streamConfig((int) format_.sample_rate, (int) format_.nb_channels);
webrtc::ProcessingConfig pconfig = {
streamConfig, /* input stream */
streamConfig, /* output stream */
......
......@@ -59,6 +59,12 @@ struct AudioFormat
return fmt::format("{{{}, {} channels, {}Hz}}", av_get_sample_fmt_name(sampleFormat), nb_channels, sample_rate);
}
inline AudioFormat withSampleFormat(AVSampleFormat format)
{
return {sample_rate, nb_channels, format};
}
/**
* Returns bytes necessary to hold one frame of audio data.
*/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment