diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp index c51bdf4b93b2901b71dd4f6310398fc82fa77890..8082949c344bfaea2255eee2e0d3ec2417d1d15c 100644 --- a/src/media/audio/audio_rtp_session.cpp +++ b/src/media/audio/audio_rtp_session.cpp @@ -175,6 +175,7 @@ AudioSender::process() Smartools::getInstance().setLocalAudioCodec(audioEncoder_->getEncoderName()); + AudioBuffer buffer; if (mainBuffFormat.sample_rate != accountAudioCodec->audioformat.sample_rate) { if (not resampler_) { RING_DBG("Creating audio resampler"); @@ -183,12 +184,16 @@ AudioSender::process() resampledData_.setFormat(accountAudioCodec->audioformat); resampledData_.resize(samplesToGet); resampler_->resample(micData_, resampledData_); - if (audioEncoder_->encode_audio(resampledData_) < 0) - RING_ERR("encoding failed"); + buffer = resampledData_; } else { - if (audioEncoder_->encode_audio(micData_) < 0) - RING_ERR("encoding failed"); + buffer = micData_; } + + if (muteState_) // audio is muted, set samples to 0 + buffer.reset(); + + if (audioEncoder_->encodeAudio(buffer.toAVFrame()) < 0) + RING_ERR("encoding failed"); } void AudioSender::setMuted(bool isMuted) diff --git a/src/media/audio/audiobuffer.cpp b/src/media/audio/audiobuffer.cpp index 7eaa832b796741057aa3e3d6ce847f3a6ef17110..9a8ce6d3ffa00149e11cf438db138088870d4f62 100644 --- a/src/media/audio/audiobuffer.cpp +++ b/src/media/audio/audiobuffer.cpp @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +#include "libav_deps.h" #include "audiobuffer.h" #include "logger.h" #include <string.h> @@ -289,4 +290,46 @@ size_t AudioBuffer::copy(AudioSample* in, size_t sample_num, size_t pos_out /* = return sample_num; } +AVFrame* +AudioBuffer::toAVFrame() const +{ + const constexpr AVSampleFormat fmt = AV_SAMPLE_FMT_S16; + + // NOTE: don't use std::vector, avcodec_fill_audio_frame makes the frame point to our array + // instead of copying it, resulting in a heap use after free once we return + AudioSample** samples; + int err = av_samples_alloc_array_and_samples(reinterpret_cast<uint8_t***>(&samples), + nullptr, channels(), frames(), fmt, 0); + if (err < 0) { + RING_ERR() << "Failed to allocate audio buffer"; + return nullptr; + } + + interleave(*samples); + + AVFrame* frame = av_frame_alloc(); + if (!frame) { + RING_ERR() << "Failed to allocate frame"; + av_freep(samples); + return nullptr; + } + + frame->nb_samples = frames(); + frame->format = fmt; + frame->channel_layout = av_get_default_channel_layout(channels()); + frame->channels = channels(); + frame->sample_rate = getSampleRate(); + + const auto bufSize = av_samples_get_buffer_size(nullptr, + frame->channels, frame->nb_samples, fmt, 0); + if (avcodec_fill_audio_frame(frame, frame->channels, fmt, + reinterpret_cast<uint8_t*>(*samples), bufSize, 0) < 0) { + RING_ERR() << "Failed to fill audio frame"; + av_frame_free(&frame); + return nullptr; + } + + return frame; +} + } // namespace ring diff --git a/src/media/audio/audiobuffer.h b/src/media/audio/audiobuffer.h index bbfbbe764622377d41e7f237ae3d74a52fd7b24c..9606b34925e714f8ea267f53f3f9e59e0854bd42 100644 --- a/src/media/audio/audiobuffer.h +++ b/src/media/audio/audiobuffer.h @@ -34,6 +34,8 @@ #include <ciso646> // fix windows compiler bug +struct AVFrame; + namespace ring { /** @@ -343,6 +345,8 @@ class AudioBuffer { */ size_t copy(AudioSample* in, size_t sample_num, size_t pos_out = 0); + AVFrame* toAVFrame() const; + private: int sampleRate_; diff --git a/src/media/media_encoder.cpp b/src/media/media_encoder.cpp index d26ea2e47d0c8525ceb23cc0ec0d93745581b6db..f5f3d0aa76f0fe69aa0273771d44e2214248c1f0 100644 --- a/src/media/media_encoder.cpp +++ b/src/media/media_encoder.cpp @@ -378,91 +378,30 @@ MediaEncoder::encode(VideoFrame& input, bool is_keyframe, } #endif // RING_VIDEO -int MediaEncoder::encode_audio(const AudioBuffer &buffer) +int MediaEncoder::encodeAudio(AVFrame* frame) { - const int needed_bytes = av_samples_get_buffer_size(nullptr, buffer.channels(), - buffer.frames(), - AV_SAMPLE_FMT_S16, 0); - if (needed_bytes < 0) { - RING_ERR("Couldn't calculate buffer size"); - return -1; - } - - std::vector<AudioSample> samples (needed_bytes / sizeof(AudioSample)); - AudioSample* sample_data = samples.data(); - - AudioSample *offset_ptr = sample_data; - int nb_frames = buffer.frames(); - - AVCodecContext* encoderCtx = encoders_[currentStreamIdx_]; - - if (not is_muted) { - //only fill buffer with samples if not muted - buffer.interleave(sample_data); - } else { - //otherwise filll buffer with zero - buffer.fillWithZero(sample_data); - } - const auto layout = buffer.channels() == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; - const auto sample_rate = buffer.getSampleRate(); - - while (nb_frames > 0) { - AVFrame* frame = av_frame_alloc(); - if (!frame) - return -1; - - if (encoderCtx->frame_size) - frame->nb_samples = std::min<int>(nb_frames, - encoderCtx->frame_size); - else - frame->nb_samples = nb_frames; - - frame->format = AV_SAMPLE_FMT_S16; - frame->channel_layout = layout; - frame->channels = buffer.channels(); - frame->sample_rate = sample_rate; - - frame->pts = getNextTimestamp(sent_samples, encoderCtx->sample_rate, encoderCtx->time_base); - sent_samples += frame->nb_samples; - - const auto buffer_size = \ - av_samples_get_buffer_size(nullptr, buffer.channels(), - frame->nb_samples, AV_SAMPLE_FMT_S16, 0); - - int err = avcodec_fill_audio_frame(frame, buffer.channels(), - AV_SAMPLE_FMT_S16, - reinterpret_cast<const uint8_t *>(offset_ptr), - buffer_size, 0); - if (err < 0) { - RING_ERR() << "Failed to fill audio frame of size" << buffer_size << " with " - << frame->nb_samples << " samples: " << libav_utils::getError(err); - av_frame_free(&frame); - return -1; - } - - nb_frames -= frame->nb_samples; - offset_ptr += frame->nb_samples * buffer.channels(); - - if (auto rec = recorder_.lock()) { - if (!recordingStarted_) { - auto ms = MediaStream("", encoders_[currentStreamIdx_], frame->pts); - if (rec->addStream(false, false, ms) >= 0) { - recordingStarted_ = true; - } else { - recorder_ = std::weak_ptr<MediaRecorder>(); - } + auto enc = encoders_[currentStreamIdx_]; + frame->pts = getNextTimestamp(sent_samples, enc->sample_rate, enc->time_base); + sent_samples += frame->nb_samples; + if (auto rec = recorder_.lock()) { + if (!recordingStarted_) { + auto ms = MediaStream("", encoders_[currentStreamIdx_], frame->pts); + if (rec->addStream(false, false, ms) >= 0) { + recordingStarted_ = true; + } else { + recorder_ = std::weak_ptr<MediaRecorder>(); } - if (recordingStarted_) - rec->recordData(frame, false, false); - } else { - recordingStarted_ = false; - recorder_ = std::weak_ptr<MediaRecorder>(); } - - encode(frame, currentStreamIdx_); - av_frame_free(&frame); + if (recordingStarted_) + rec->recordData(frame, false, false); + } else { + recordingStarted_ = false; + recorder_ = std::weak_ptr<MediaRecorder>(); } + encode(frame, currentStreamIdx_); + av_frame_free(&frame); + return 0; } diff --git a/src/media/media_encoder.h b/src/media/media_encoder.h index 1ccc7840708b097aac8121105152bd6455f558a0..9ec7437b8d55084f8eab9b5d61d29d6962325bac 100644 --- a/src/media/media_encoder.h +++ b/src/media/media_encoder.h @@ -75,7 +75,7 @@ public: int encode(VideoFrame &input, bool is_keyframe, int64_t frame_number); #endif // RING_VIDEO - int encode_audio(const AudioBuffer &input); + int encodeAudio(AVFrame* frame); // frame should be ready to be sent to the encoder at this point int encode(AVFrame* frame, int streamIdx);