diff --git a/daemon/configure.ac b/daemon/configure.ac index 6f6ef4a9a7e697d25f75db38114fc348aefc7aee..1214c8dc84b4289a4148c6c979f845d13031411e 100644 --- a/daemon/configure.ac +++ b/daemon/configure.ac @@ -366,6 +366,7 @@ AS_IF([test "x$enable_video" != "xno"], ], [ AM_CONDITIONAL(SFL_VIDEO, false) + AC_DEFINE_UNQUOTED([USE_CCRTP], 1, [Use ccrtp instead of libavformat]) ]); LIBCCRTP_MIN_VERSION=1.3.0 diff --git a/daemon/src/audio/audiortp/Makefile.am b/daemon/src/audio/audiortp/Makefile.am index 642f365a1ad8c65078b48cfd7e4bff82ec8d047f..360b36dc01e8eb17a4c8414f6cb587d4901e998f 100644 --- a/daemon/src/audio/audiortp/Makefile.am +++ b/daemon/src/audio/audiortp/Makefile.am @@ -8,6 +8,17 @@ endif libaudiortp_la_SOURCES = \ $(SFL_ZRTP_SRC) \ + base64.c base64.h + +if SFL_VIDEO +libaudiortp_la_SOURCES += \ + avformat_rtp_session.cpp \ + avformat_rtp_session.h + +AM_CXXFLAGS = @LIBAVFORMAT_CFLAGS@ + +else +libaudiortp_la_SOURCES += \ audio_rtp_session.cpp \ audio_symmetric_rtp_session.cpp \ audio_rtp_stream.cpp \ @@ -19,5 +30,8 @@ libaudiortp_la_SOURCES = \ audio_rtp_stream.h \ audio_rtp_factory.h \ audio_symmetric_rtp_session.h \ - audio_srtp_session.h \ - base64.c base64.h + audio_srtp_session.h +endif + +# FIXME +AM_CPPFLAGS += -I$(top_srcdir)/src diff --git a/daemon/src/audio/audiortp/TODO b/daemon/src/audio/audiortp/TODO new file mode 100644 index 0000000000000000000000000000000000000000..06bf60ae98d7519436f509de8c85c2292fe632da --- /dev/null +++ b/daemon/src/audio/audiortp/TODO @@ -0,0 +1,32 @@ +Tested and Working +------- +* Opus +* PCMU +* PCMA +* speex narrowband +* G722 +* Mono and stereo input + +Needs to be implemented +----------------------- +* SRTP +* DTMF over RTP +* RTP + STUN +* Rename Video{Encoder,Decoder} to AV{Encoder,Decoder} +* Drop CCRTP and its dependencies and its dependents for real + +Needs to be fixed: +------------------ +* speex wideband fails with "Invalid data found when processing input" +* speex ultraband fails with "Invalid data found when processing input" + +Might work if libavcodec is built with support for it (untested): +----------------------------------------------------------------- +* ILBC +* g726 + +Can't work: +----------- +* gsm +* g729 +* libavformat is MISSING RTP mux/demux for gsm and g729, see is_supported(enum AVCodecID id) in libavformat/rtpenc.c diff --git a/daemon/src/audio/audiortp/avformat_rtp_session.cpp b/daemon/src/audio/audiortp/avformat_rtp_session.cpp new file mode 100644 index 0000000000000000000000000000000000000000..78c11db8ff4b740f3cd622c7ab2a6d245c8c3159 --- /dev/null +++ b/daemon/src/audio/audiortp/avformat_rtp_session.cpp @@ -0,0 +1,489 @@ +/* + * Copyright (C) 2014 Savoir-Faire Linux Inc. + * Author: Tristan Matthews <tristan.matthews@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Additional permission under GNU GPL version 3 section 7: + * + * If you modify this program, or any covered work, by linking or + * combining it with the OpenSSL project's OpenSSL library (or a + * modified version of that library), containing parts covered by the + * terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc. + * grants you additional permission to convey the resulting work. + * Corresponding Source for a non-source form of such a combination + * shall include the source code for the parts of OpenSSL used as well + * as that of the covered work. + */ +#include "avformat_rtp_session.h" + +#include "logger.h" +#include "noncopyable.h" +#include "sip/sdp.h" +#include "video/socket_pair.h" +#include "video/video_base.h" +#include "video/video_encoder.h" +#include "video/video_decoder.h" +#include "video/libav_deps.h" +#include "audio/audiobuffer.h" +#include "audio/ringbufferpool.h" +#include "audio/resampler.h" +#include "manager.h" +#include <sstream> + +namespace sfl { +using sfl_video::SocketPair; +using sfl_video::VideoEncoder; +using sfl_video::VideoIOHandle; +using sfl_video::VideoEncoderException; + +class AudioSender { + public: + AudioSender(const std::string& id, + std::map<std::string, std::string> txArgs, + sfl_video::SocketPair& socketPair); + ~AudioSender(); + + private: + NON_COPYABLE(AudioSender); + + bool waitForDataEncode(const std::chrono::milliseconds& max_wait) const; + bool setup(sfl_video::SocketPair& socketPair); + + std::string id_; + std::map<std::string, std::string> args_; + const AudioFormat format_; + std::unique_ptr<sfl_video::VideoEncoder> audioEncoder_; + std::unique_ptr<sfl_video::VideoIOHandle> muxContext_; + std::unique_ptr<sfl::Resampler> resampler_; + const double secondsPerPacket_ {0.02}; // 20 ms + + ThreadLoop loop_; + void process(); + void cleanup(); +}; + +AudioSender::AudioSender(const std::string& id, std::map<std::string, std::string> txArgs, SocketPair& socketPair) : + id_(id), + args_(txArgs), + format_(std::atoi(args_["sample_rate"].c_str()), + std::atoi(args_["channels"].c_str())), + loop_([&] { return setup(socketPair); }, + std::bind(&AudioSender::process, this), + std::bind(&AudioSender::cleanup, this)) +{ + std::ostringstream os; + os << secondsPerPacket_ * format_.sample_rate; + args_["frame_size"] = os.str(); + loop_.start(); +} + +AudioSender::~AudioSender() +{ + loop_.join(); +} + +bool +AudioSender::setup(SocketPair& socketPair) +{ + auto enc_name = args_["codec"].c_str(); + auto dest = args_["destination"].c_str(); + + audioEncoder_.reset(new VideoEncoder); + muxContext_.reset(socketPair.createIOContext()); + + try { + /* Encoder setup */ + audioEncoder_->setOptions(args_); + audioEncoder_->openOutput(enc_name, "rtp", dest, NULL, false); + audioEncoder_->setIOContext(muxContext_); + audioEncoder_->startIO(); + } catch (const VideoEncoderException &e) { + SFL_ERR("%s", e.what()); + return false; + } + + std::string sdp; + audioEncoder_->print_sdp(sdp); + SFL_WARN("\n%s", sdp.c_str()); + + return true; +} + +void +AudioSender::cleanup() +{ + audioEncoder_.reset(); + muxContext_.reset(); +} + +void +AudioSender::process() +{ + auto mainBuffFormat = Manager::instance().getRingBufferPool().getInternalAudioFormat(); + double resampleFactor = mainBuffFormat.sample_rate / (double) format_.sample_rate; + + // compute nb of byte to get corresponding to 1 audio frame + const size_t samplesToGet = resampleFactor * secondsPerPacket_ * format_.sample_rate; + + if (Manager::instance().getRingBufferPool().availableForGet(id_) < samplesToGet) + return; + + // FIXME + AudioBuffer micData(samplesToGet, mainBuffFormat); + + const size_t samples = Manager::instance().getRingBufferPool().getData(micData, id_); + micData.setChannelNum(format_.nb_channels, true); // down/upmix as needed + + if (samples != samplesToGet) { + SFL_ERR("Asked for %d samples from bindings on call '%s', got %d", + samplesToGet, id_.c_str(), samples); + return; + } + + if (mainBuffFormat.sample_rate != format_.sample_rate) + { + if (not resampler_) { + SFL_DBG("Creating audio resampler"); + resampler_.reset(new Resampler(format_)); + } + AudioBuffer resampledData(samplesToGet, format_); + resampler_->resample(micData, resampledData); + if (audioEncoder_->encode_audio(resampledData) < 0) + SFL_ERR("encoding failed"); + } else { + if (audioEncoder_->encode_audio(micData) < 0) + SFL_ERR("encoding failed"); + } + + const int millisecondsPerPacket = secondsPerPacket_ * 1000; + if (waitForDataEncode(std::chrono::milliseconds(millisecondsPerPacket))) { + // Data available ! + } +} + +bool +AudioSender::waitForDataEncode(const std::chrono::milliseconds& max_wait) const +{ + auto& mainBuffer = Manager::instance().getRingBufferPool(); + auto mainBuffFormat = mainBuffer.getInternalAudioFormat(); + auto resampleFactor = (double) mainBuffFormat.sample_rate / format_.sample_rate; + const size_t samplesToGet = resampleFactor * secondsPerPacket_ * format_.sample_rate; + + return mainBuffer.waitForDataAvailable(id_, samplesToGet, max_wait); +} + +class AudioReceiveThread +{ + public: + AudioReceiveThread(const std::string &id, const std::string &sdp); + ~AudioReceiveThread(); + void addIOContext(sfl_video::SocketPair &socketPair); + void startLoop(); + + private: + NON_COPYABLE(AudioReceiveThread); + + static constexpr int SDP_BUFFER_SIZE = 8192; + static constexpr auto SDP_FILENAME = "dummyFilename"; + + std::map<std::string, std::string> args_; + + static int interruptCb(void *ctx); + static int readFunction(void *opaque, uint8_t *buf, int buf_size); + + void openDecoder(); + bool decodeFrame(); + + /*-----------------------------------------------------------------*/ + /* These variables should be used in thread (i.e. process()) only! */ + /*-----------------------------------------------------------------*/ + const std::string id_; + std::istringstream stream_; + std::unique_ptr<sfl_video::VideoDecoder> audioDecoder_; + std::unique_ptr<sfl_video::VideoIOHandle> sdpContext_; + std::unique_ptr<sfl_video::VideoIOHandle> demuxContext_; + std::shared_ptr<sfl::RingBuffer> ringbuffer_; + + ThreadLoop loop_; + bool setup(); + void process(); + void cleanup(); +}; + +AudioReceiveThread::AudioReceiveThread(const std::string& id, const std::string& sdp) + : id_(id) + , stream_(sdp) + , sdpContext_(new VideoIOHandle(SDP_BUFFER_SIZE, false, &readFunction, 0, 0, this)) + , loop_(std::bind(&AudioReceiveThread::setup, this), + std::bind(&AudioReceiveThread::process, this), + std::bind(&AudioReceiveThread::cleanup, this)) +{} + +AudioReceiveThread::~AudioReceiveThread() +{ + loop_.join(); +} + + +bool +AudioReceiveThread::setup() +{ + audioDecoder_.reset(new sfl_video::VideoDecoder()); + audioDecoder_->setInterruptCallback(interruptCb, this); + // custom_io so the SDP demuxer will not open any UDP connections + args_["sdp_flags"] = "custom_io"; + EXIT_IF_FAIL(not stream_.str().empty(), "No SDP loaded"); + audioDecoder_->setIOContext(sdpContext_.get()); + audioDecoder_->setOptions(args_); + EXIT_IF_FAIL(not audioDecoder_->openInput(SDP_FILENAME, "sdp"), + "Could not open input \"%s\"", SDP_FILENAME); + // Now replace our custom AVIOContext with one that will read packets + audioDecoder_->setIOContext(demuxContext_.get()); + + EXIT_IF_FAIL(not audioDecoder_->setupFromAudioData(), + "decoder IO startup failed"); + + ringbuffer_ = Manager::instance().getRingBufferPool().getRingBuffer(id_); + return true; +} + +void +AudioReceiveThread::process() +{ + sfl::AudioFormat mainBuffFormat = Manager::instance().getRingBufferPool().getInternalAudioFormat(); + std::unique_ptr<AVFrame, void(*)(AVFrame*)> decodedFrame(av_frame_alloc(), [](AVFrame*p){av_frame_free(&p);}); + + switch (audioDecoder_->decode_audio(decodedFrame.get())) { + + case sfl_video::VideoDecoder::Status::FrameFinished: + audioDecoder_->writeToRingBuffer(decodedFrame.get(), *ringbuffer_, + mainBuffFormat); + return; + + case sfl_video::VideoDecoder::Status::DecodeError: + SFL_WARN("decoding failure, trying to reset decoder..."); + if (not setup()) { + SFL_ERR("fatal error, rx thread re-setup failed"); + loop_.stop(); + break; + } + if (not audioDecoder_->setupFromAudioData()) { + SFL_ERR("fatal error, a-decoder setup failed"); + loop_.stop(); + break; + } + break; + + case sfl_video::VideoDecoder::Status::ReadError: + SFL_ERR("fatal error, read failed"); + loop_.stop(); + break; + + default: + break; + } +} + +void +AudioReceiveThread::cleanup() +{ + audioDecoder_.reset(); + demuxContext_.reset(); +} + +int +AudioReceiveThread::readFunction(void* opaque, uint8_t* buf, int buf_size) +{ + std::istream& is = static_cast<AudioReceiveThread*>(opaque)->stream_; + is.read(reinterpret_cast<char*>(buf), buf_size); + return is.gcount(); +} + +// This callback is used by libav internally to break out of blocking calls +int +AudioReceiveThread::interruptCb(void* data) +{ + auto context = static_cast<AudioReceiveThread*>(data); + return not context->loop_.isRunning(); +} + +void +AudioReceiveThread::addIOContext(SocketPair& socketPair) +{ + demuxContext_.reset(socketPair.createIOContext()); +} + +void +AudioReceiveThread::startLoop() +{ + loop_.start(); +} + +AVFormatRtpSession::AVFormatRtpSession(const std::string& id, + const std::map<std::string, std::string>& txArgs) + : id_(id), txArgs_(txArgs) +{ + // don't move this into the initializer list or Cthulus will emerge + ringbuffer_ = Manager::instance().getRingBufferPool().createRingBuffer(id_); +} + +AVFormatRtpSession::~AVFormatRtpSession() +{ + stop(); +} + +void +AVFormatRtpSession::updateSDP(const Sdp& sdp) +{ + std::lock_guard<std::recursive_mutex> lock(mutex_); + std::string desc(sdp.getIncomingAudioDescription()); + + // if port has changed + if (not desc.empty() and desc != receivingSDP_) { + receivingSDP_ = desc; + SFL_WARN("Updated incoming SDP to:\n%s", + receivingSDP_.c_str()); + } + + if (desc.empty()) { + SFL_DBG("Audio is inactive"); + receiving_ = false; + sending_ = false; + } else if (desc.find("sendrecv") != std::string::npos) { + SFL_DBG("Sending and receiving audio"); + receiving_ = true; + sending_ = true; + } else if (desc.find("inactive") != std::string::npos) { + SFL_DBG("Audio is inactive"); + receiving_ = false; + sending_ = false; + } else if (desc.find("sendonly") != std::string::npos) { + SFL_DBG("Receiving audio disabled, audio set to sendonly"); + receiving_ = false; + sending_ = true; + } else if (desc.find("recvonly") != std::string::npos) { + SFL_DBG("Sending audio disabled, audio set to recvonly"); + sending_ = false; + receiving_ = true; + } + // even if it says sendrecv or recvonly, our peer may disable audio by + // setting the port to 0 + if (desc.find("m=audio 0") != std::string::npos) { + SFL_DBG("Receiving audio disabled, port was set to 0"); + receiving_ = false; + } + + if (sending_) + sending_ = sdp.getOutgoingAudioSettings(txArgs_); +} + +void +AVFormatRtpSession::updateDestination(const std::string& destination, + unsigned int port) +{ + std::lock_guard<std::recursive_mutex> lock(mutex_); + + if (destination.empty()) { + SFL_WARN("Destination is empty, ignoring"); + return; + } + + std::stringstream tmp; + tmp << "rtp://" << destination << ":" << port; + + // if destination has changed + if (tmp.str() != txArgs_["destination"]) { + if (sender_) { + SFL_WARN("Audio is already being sent"); + return; + } + txArgs_["destination"] = tmp.str(); + SFL_DBG("updated dest to %s", txArgs_["destination"].c_str()); + } + + if (port == 0) { + SFL_DBG("Sending audio disabled, port was set to 0"); + sending_ = false; + } +} + +void +AVFormatRtpSession::startSender() +{ + if (not sending_) + return; + + if (sender_) + SFL_WARN("Restarting audio sender"); + + try { + sender_.reset(new AudioSender(id_, txArgs_, *socketPair_)); + } catch (const VideoEncoderException &e) { + SFL_ERR("%s", e.what()); + sending_ = false; + } +} + +void +AVFormatRtpSession::startReceiver() +{ + if (receiving_) { + if (receiveThread_) + SFL_WARN("restarting video receiver"); + receiveThread_.reset(new AudioReceiveThread(id_, receivingSDP_)); + receiveThread_->addIOContext(*socketPair_); + receiveThread_->startLoop(); + } else { + SFL_DBG("Audio receiving disabled"); + receiveThread_.reset(); + } +} + +void +AVFormatRtpSession::start(int localPort) +{ + std::lock_guard<std::recursive_mutex> lock(mutex_); + + if (not sending_ and not receiving_) { + stop(); + return; + } + + try { + socketPair_.reset(new SocketPair(txArgs_["destination"].c_str(), localPort)); + } catch (const std::runtime_error &e) { + SFL_ERR("Socket creation failed on port %d: %s", localPort, e.what()); + return; + } + + startSender(); + startReceiver(); +} + +void +AVFormatRtpSession::stop() +{ + std::lock_guard<std::recursive_mutex> lock(mutex_); + + if (socketPair_) + socketPair_->interrupt(); + + receiveThread_.reset(); + sender_.reset(); + socketPair_.reset(); +} + +} // end namespace sfl diff --git a/daemon/src/audio/audiortp/avformat_rtp_session.h b/daemon/src/audio/audiortp/avformat_rtp_session.h new file mode 100644 index 0000000000000000000000000000000000000000..de1e1c9ffb5af6c01b88a1f43a73a7ae84b4e79d --- /dev/null +++ b/daemon/src/audio/audiortp/avformat_rtp_session.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2014 Savoir-Faire Linux Inc. + * Author: Tristan Matthews <tristan.matthews@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Additional permission under GNU GPL version 3 section 7: + * + * If you modify this program, or any covered work, by linking or + * combining it with the OpenSSL project's OpenSSL library (or a + * modified version of that library), containing parts covered by the + * terms of the OpenSSL or SSLeay licenses, Savoir-Faire Linux Inc. + * grants you additional permission to convey the resulting work. + * Corresponding Source for a non-source form of such a combination + * shall include the source code for the parts of OpenSSL used as well + * as that of the covered work. + */ + +#ifndef AVFORMAT_RTP_SESSION_H__ +#define AVFORMAT_RTP_SESSION_H__ + +#include "threadloop.h" +#include "audio/audiobuffer.h" +#include "noncopyable.h" + +#include <map> +#include <string> +#include <memory> +#include <mutex> + +namespace sfl_video { +class SocketPair; +class VideoEncoder; +} + +class Sdp; +class ThreadLoop; + +namespace sfl { + +class RingBuffer; +class Resampler; +class AudioSender; +class AudioReceiveThread; + +class AVFormatRtpSession { + public: + AVFormatRtpSession(const std::string& id, + const std::map<std::string, std::string>& txArgs); + ~AVFormatRtpSession(); + + void start(int localPort); + void stop(); + void updateDestination(const std::string& destination, unsigned int port); + void updateSDP(const Sdp &sdp); + + private: + NON_COPYABLE(AVFormatRtpSession); + + void startSender(); + void startReceiver(); + + std::string id_; + std::map<std::string, std::string> txArgs_; + std::string receivingSDP_; + std::unique_ptr<sfl_video::SocketPair> socketPair_; + std::unique_ptr<AudioSender> sender_; + std::unique_ptr<AudioReceiveThread> receiveThread_; + std::shared_ptr<sfl::RingBuffer> ringbuffer_; + std::recursive_mutex mutex_; + bool sending_; + bool receiving_; +}; + +} + +#endif // __AVFORMAT_RTP_SESSION_H__ diff --git a/daemon/src/client/callmanager.cpp b/daemon/src/client/callmanager.cpp index f5b8ae8ac744355c3e7211f176268c2be7cf8177..feea893e1a409fdc30e3ac5af8288d534b6127f2 100644 --- a/daemon/src/client/callmanager.cpp +++ b/daemon/src/client/callmanager.cpp @@ -278,7 +278,7 @@ CallManager::startTone(int32_t start, int32_t type) // for conferencing in order to get // the right pointer for the given // callID. -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP sfl::AudioZrtpSession * CallManager::getAudioZrtpSession(const std::string& callID) { @@ -299,7 +299,7 @@ CallManager::getAudioZrtpSession(const std::string& callID) void CallManager::setSASVerified(const std::string& callID) { -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP try { sfl::AudioZrtpSession * zSession; zSession = getAudioZrtpSession(callID); @@ -314,7 +314,7 @@ CallManager::setSASVerified(const std::string& callID) void CallManager::resetSASVerified(const std::string& callID) { -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP try { sfl::AudioZrtpSession * zSession; zSession = getAudioZrtpSession(callID); @@ -329,7 +329,7 @@ CallManager::resetSASVerified(const std::string& callID) void CallManager::setConfirmGoClear(const std::string& callID) { -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP try { sfl::AudioZrtpSession * zSession; zSession = getAudioZrtpSession(callID); @@ -344,7 +344,7 @@ CallManager::setConfirmGoClear(const std::string& callID) void CallManager::requestGoClear(const std::string& callID) { -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP try { sfl::AudioZrtpSession * zSession; zSession = getAudioZrtpSession(callID); @@ -359,7 +359,7 @@ CallManager::requestGoClear(const std::string& callID) void CallManager::acceptEnrollment(const std::string& callID, bool accepted) { -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP try { sfl::AudioZrtpSession * zSession; zSession = getAudioZrtpSession(callID); diff --git a/daemon/src/client/callmanager.h b/daemon/src/client/callmanager.h index 2d65a712695755dd016de1b695e653e64ceb2454..81b345098698b015eb655fbf9ff958f94e4b6818 100644 --- a/daemon/src/client/callmanager.h +++ b/daemon/src/client/callmanager.h @@ -156,7 +156,7 @@ class CallManager private: -#if HAVE_ZRTP +#if USE_CCRTP && HAVE_ZRTP sfl::AudioZrtpSession * getAudioZrtpSession(const std::string& callID); #endif diff --git a/daemon/src/ringdht/ringaccount.cpp b/daemon/src/ringdht/ringaccount.cpp index 94e768119a04945abaeb362fac279ef0997f348d..940219afbfe98e507e0fbe211c8235b7f1163f83 100644 --- a/daemon/src/ringdht/ringaccount.cpp +++ b/daemon/src/ringdht/ringaccount.cpp @@ -180,6 +180,7 @@ RingAccount::createOutgoingCall(const std::shared_ptr<SIPCall>& call, const std: std::vector<sfl::AudioCodec *> audioCodecs; audioCodecs.push_back(ac); +#if USE_CCRTP try { call->getAudioRtp().initConfig(); call->getAudioRtp().initSession(); @@ -192,6 +193,7 @@ RingAccount::createOutgoingCall(const std::shared_ptr<SIPCall>& call, const std: } catch (...) { throw VoipLinkException("Could not start rtp session for early media"); } +#endif // Building the local SDP offer auto& sdp = call->getSDP(); diff --git a/daemon/src/sip/sdp.cpp b/daemon/src/sip/sdp.cpp index a9d4c84886b8a5ee2955e2136910856178adf18d..509bff3dfe197d7427635f3bd634e527c8e6ad18 100644 --- a/daemon/src/sip/sdp.cpp +++ b/daemon/src/sip/sdp.cpp @@ -289,7 +289,8 @@ Sdp::setMediaDescriptorLines(bool audio) enc_name = codec->getMimeSubtype(); clock_rate = codec->getSDPClockRate(); channels = codec->getSDPChannels(); - // G722 require G722/8000 media description even if it is 16000 codec + // G722 requires G722/8000 media description even though it's @ 16000 Hz + // See http://tools.ietf.org/html/rfc3551#section-4.5.2 if (codec->getPayloadType () == 9) clock_rate = 8000; } else { @@ -656,6 +657,47 @@ string Sdp::getIncomingVideoDescription() const return sessionStr; } +// FIXME: +// Here we filter out parts of the SDP that libavformat doesn't need to +// know about...we should probably give the audio decoder thread the original +// SDP and deal with the streams properly at that level +std::string Sdp::getIncomingAudioDescription() const +{ + pjmedia_sdp_session *audioSession = pjmedia_sdp_session_clone(memPool_, activeLocalSession_); + if (!audioSession) { + SFL_ERR("Could not clone SDP"); + return ""; + } + + // deactivate non-audio media + bool hasAudio = false; + for (unsigned i = 0; i < audioSession->media_count; i++) + if (pj_stricmp2(&audioSession->media[i]->desc.media, "audio")) { + if (pjmedia_sdp_media_deactivate(memPool_, audioSession->media[i]) != PJ_SUCCESS) + SFL_ERR("Could not deactivate media"); + } else { + hasAudio = true; + } + + if (not hasAudio) { + SFL_DBG("No audio present in active local SDP"); + return ""; + } + + char buffer[4096]; + const size_t size = pjmedia_sdp_print(audioSession, buffer, sizeof(buffer)); + std::string sessionStr(buffer, std::min(size, sizeof(buffer))); + + // FIXME: find a way to get rid of the "m=video..." line with PJSIP + + const size_t videoPos = sessionStr.find("m=video"); + const size_t newline2 = sessionStr.find('\n', videoPos); + const size_t newline1 = sessionStr.rfind('\n', videoPos); + + sessionStr.erase(newline1, newline2 - newline1); + return sessionStr; +} + std::string Sdp::getOutgoingVideoCodec() const { string str("a=rtpmap:"); @@ -669,6 +711,64 @@ std::string Sdp::getOutgoingVideoCodec() const return string(codec_buf); } +// FIXME: merge these into a single parsing function, lot of repetition here +std::string Sdp::getOutgoingAudioCodec() const +{ + string str("a=rtpmap:"); + std::stringstream os; + os << getOutgoingAudioPayload(); + str += os.str(); + string aCodecLine(getLineFromSession(activeRemoteSession_, str)); + char codec_buf[32]; + codec_buf[0] = '\0'; + sscanf(aCodecLine.c_str(), "a=rtpmap:%*d %31[^/]", codec_buf); + return string(codec_buf); +} + +std::string Sdp::getOutgoingAudioRate() const +{ + // e.g. opus/48000/2, g722/16000 + string str("a=rtpmap:"); + std::stringstream os; + os << getOutgoingAudioPayload(); + str += os.str(); + string aCodecLine(getLineFromSession(activeRemoteSession_, str)); + const auto pos = aCodecLine.find_first_of("/"); + if (pos < aCodecLine.size() - 1) { + const auto tmp = aCodecLine.substr(pos + 1); + // strip channel if present + const auto end = tmp.find_first_of("/"); + if (end != string::npos) + return tmp.substr(0, end); + else + return tmp; + } else { + const char *DEFAULT_RATE = "8000"; + SFL_ERR("No rate found in SDP, defaulting to %s", DEFAULT_RATE); + return DEFAULT_RATE; + } +} + +std::string Sdp::getOutgoingAudioChannels() const +{ + // e.g. opus/48000/2, g722/16000 + string str("a=rtpmap:"); + std::stringstream os; + os << getOutgoingAudioPayload(); + str += os.str(); + string aCodecLine(getLineFromSession(activeRemoteSession_, str)); + + const auto nb_slashes = std::count(aCodecLine.begin(), aCodecLine.end(), '/'); + const auto pos = aCodecLine.find_last_of("/"); + if (nb_slashes > 1 and pos < aCodecLine.size() - 1) { + return aCodecLine.substr(pos + 1); + } else { + const char *DEFAULT_CHANNELS = "1"; + SFL_ERR("No channels found in SDP, defaulting to %s", DEFAULT_CHANNELS); + return DEFAULT_CHANNELS; + } +} + static vector<map<string, string> >::const_iterator findCodecInList(const vector<map<string, string> > &codecs, const string &codec) { @@ -702,6 +802,16 @@ Sdp::getOutgoingVideoPayload() const return payload_num; } +int +Sdp::getOutgoingAudioPayload() const +{ + string audioLine(getLineFromSession(activeRemoteSession_, "m=audio")); + int payload_num; + if (sscanf(audioLine.c_str(), "m=audio %*d %*s %d", &payload_num) != 1) + payload_num = 0; + return payload_num; +} + void Sdp::getProfileLevelID(const pjmedia_sdp_session *session, std::string &profile, int payload) const @@ -870,3 +980,47 @@ bool Sdp::getOutgoingVideoSettings(map<string, string> &args) const #endif return false; } + +#ifndef USE_CCRTP +bool Sdp::getOutgoingAudioSettings(map<string, string> &args) const +{ + string codec(getOutgoingAudioCodec()); + if (not codec.empty()) { + const string encoder(libav_utils::encodersMap()[codec]); + if (encoder.empty()) { + SFL_DBG("Couldn't find encoder for \"%s\"\n", codec.c_str()); + return false; + } else { + args["codec"] = encoder; + const int payload = getOutgoingAudioPayload(); + std::ostringstream os; + os << payload; + args["payload_type"] = os.str(); + } + + const string rate(getOutgoingAudioRate()); + if (rate.empty()) { + SFL_DBG("Couldn't find rate for \"%s\"\n", codec.c_str()); + return false; + } else { + // G722 requires G722/8000 media description even though it's @ 16000 Hz + // See http://tools.ietf.org/html/rfc3551#section-4.5.2 + if (codec == "G722") + args["sample_rate"] = "16000"; + else + args["sample_rate"] = rate; + } + + const string channels(getOutgoingAudioChannels()); + if (channels.empty()) { + SFL_DBG("Couldn't find channels for \"%s\"\n", codec.c_str()); + return false; + } else { + args["channels"] = channels; + } + + return true; + } + return false; +} +#endif diff --git a/daemon/src/sip/sdp.h b/daemon/src/sip/sdp.h index 1d28d43b814556e2a61ba267b09c4ea8f1a2b7af..eae0a82031f4d919cc374216d6dbd28f81711ee3 100644 --- a/daemon/src/sip/sdp.h +++ b/daemon/src/sip/sdp.h @@ -118,6 +118,12 @@ class Sdp { */ std::string getIncomingVideoDescription() const; + /** + * Returns a string version of the negotiated SDP fields which pertain + * to audio. + */ + std::string getIncomingAudioDescription() const; + /* * On building an invite outside a dialog, build the local offer and create the * SDP negotiator instance with it. @@ -217,6 +223,10 @@ class Sdp { return localVideoDataPort_; } + unsigned int getLocalAudioPort() const { + return localAudioDataPort_; + } + void addAttributeToLocalAudioMedia(const char *attr); void removeAttributeFromLocalAudioMedia(const char *attr); void addAttributeToLocalVideoMedia(const char *attr); @@ -258,6 +268,7 @@ class Sdp { // Sets @param settings with appropriate values and returns true if // we are sending video, false otherwise bool getOutgoingVideoSettings(std::map<std::string, std::string> &settings) const; + bool getOutgoingAudioSettings(std::map<std::string, std::string> &settings) const; private: NON_COPYABLE(Sdp); @@ -265,8 +276,12 @@ class Sdp { std::string getLineFromSession(const pjmedia_sdp_session *sess, const std::string &keyword) const; std::string getOutgoingVideoCodec() const; + std::string getOutgoingAudioCodec() const; + std::string getOutgoingAudioRate() const; + std::string getOutgoingAudioChannels() const; std::string getOutgoingVideoField(const std::string &codec, const char *key) const; int getOutgoingVideoPayload() const; + int getOutgoingAudioPayload() const; void getProfileLevelID(const pjmedia_sdp_session *session, std::string &dest, int payload) const; void updateRemoteIP(unsigned index); diff --git a/daemon/src/sip/sipaccount.cpp b/daemon/src/sip/sipaccount.cpp index e032de192350c1f3d5eb17cead9a83cebf2b0599..d5e1a86e87301aeb7c50994810ebcfe506915b5a 100644 --- a/daemon/src/sip/sipaccount.cpp +++ b/daemon/src/sip/sipaccount.cpp @@ -235,6 +235,7 @@ SIPAccount::newOutgoingCall(const std::string& id, const std::string& toUrl) std::vector<sfl::AudioCodec *> audioCodecs; audioCodecs.push_back(ac); +#if USE_CCRTP try { call->getAudioRtp().initConfig(); call->getAudioRtp().initSession(); @@ -247,6 +248,7 @@ SIPAccount::newOutgoingCall(const std::string& id, const std::string& toUrl) } catch (...) { throw VoipLinkException("Could not start rtp session for early media"); } +#endif // Building the local SDP offer auto& sdp = call->getSDP(); diff --git a/daemon/src/sip/sipcall.cpp b/daemon/src/sip/sipcall.cpp index 977107ac26b8b63cacf6c593dc716105696ea664..2232cfec83cdecb826c12f27a9c0b1aa2ca7e934 100644 --- a/daemon/src/sip/sipcall.cpp +++ b/daemon/src/sip/sipcall.cpp @@ -43,7 +43,11 @@ #include "manager.h" #include "array_size.h" +#if USE_CCRTP #include "audio/audiortp/audio_rtp_factory.h" // for AudioRtpFactoryException +#else +#include "audio/audiortp/avformat_rtp_session.h" +#endif #if HAVE_INSTANT_MESSAGING #include "im/instant_messaging.h" @@ -73,7 +77,9 @@ static void dtmfSend(SIPCall &call, char code, const std::string &dtmf) { if (dtmf == SIPAccount::OVERRTP_STR) { +#if USE_CCRTP call.getAudioRtp().sendDtmfDigit(code); +#endif return; } else if (dtmf != SIPAccount::SIPINFO_STR) { SFL_WARN("Unknown DTMF type %s, defaulting to %s instead", @@ -100,7 +106,11 @@ dtmfSend(SIPCall &call, char code, const std::string &dtmf) SIPCall::SIPCall(SIPAccountBase& account, const std::string& id, Call::CallType type) : Call(account, id, type) +#if USE_CCRTP , audiortp_(this) +#else + , avformatrtp_(new sfl::AVFormatRtpSession(id, /* FIXME: These are video! */ getSettings())) +#endif #ifdef SFL_VIDEO // The ID is used to associate video streams to calls , videortp_(id, getSettings()) @@ -130,7 +140,11 @@ void SIPCall::stopRtpIfCurrent() { if (Manager::instance().isCurrentCall(*this)) { +#if USE_CCRTP getAudioRtp().stop(); +#else + avformatrtp_->stop(); +#endif #ifdef SFL_VIDEO getVideoRtp().stop(); #endif @@ -242,6 +256,7 @@ SIPCall::sendSIPInfo(const char *const body, const char *const subtype) void SIPCall::updateSDPFromSTUN() { +#if USE_CCRTP auto& account = getSIPAccount(); std::vector<long> socketDescriptors(getAudioRtp().getSocketDescriptors()); @@ -258,6 +273,7 @@ SIPCall::updateSDPFromSTUN() } catch (const std::runtime_error &e) { SFL_ERR("%s", e.what()); } +#endif } void SIPCall::answer() @@ -359,7 +375,11 @@ SIPCall::refuse() if (!isIncoming() or getConnectionState() == Call::CONNECTED or !inv) return; +#if USE_CCRTP getAudioRtp().stop(); +#else + avformatrtp_->stop(); +#endif pjsip_tx_data *tdata; @@ -553,8 +573,12 @@ SIPCall::onhold() if (not setState(Call::HOLD)) return; +#if USE_CCRTP audiortp_.saveLocalContext(); audiortp_.stop(); +#else + avformatrtp_->stop(); +#endif #ifdef SFL_VIDEO videortp_.stop(); #endif @@ -576,6 +600,7 @@ SIPCall::onhold() void SIPCall::offhold() { +#if USE_CCRTP auto& account = getSIPAccount(); try { @@ -594,6 +619,7 @@ SIPCall::offhold() } catch (const sfl::AudioRtpFactoryException &) { throw VoipLinkException("Socket problem in offhold"); } +#endif } void @@ -631,6 +657,7 @@ SIPCall::internalOffHold(const std::function<void()> &SDPUpdateFunc) throw std::runtime_error("Could not instantiate any codecs"); } +#if USE_CCRTP audiortp_.initConfig(); audiortp_.initSession(); @@ -640,6 +667,7 @@ SIPCall::internalOffHold(const std::function<void()> &SDPUpdateFunc) audiortp_.restoreLocalContext(); audiortp_.initLocalCryptoInfoOnOffHold(); audiortp_.start(audioCodecs); +#endif sdp_->removeAttributeFromLocalAudioMedia("sendrecv"); sdp_->removeAttributeFromLocalAudioMedia("sendonly"); diff --git a/daemon/src/sip/sipcall.h b/daemon/src/sip/sipcall.h index eb141666a1b85f6399cac3f072028010e6055afa..c20bdf6f60843edda9a1e3f9e7ff677233bb9d54 100644 --- a/daemon/src/sip/sipcall.h +++ b/daemon/src/sip/sipcall.h @@ -39,7 +39,9 @@ #endif #include "call.h" +#if USE_CCRTP #include "audio/audiortp/audio_rtp_factory.h" +#endif #ifdef SFL_VIDEO #include "video/video_rtp_session.h" #endif @@ -59,6 +61,10 @@ class Sdp; class SIPAccountBase; class SipTransport; +namespace sfl { +class AVFormatRtpSession; +} + /** * @file sipcall.h * @brief SIPCall are SIP implementation of a normal Call @@ -90,18 +96,28 @@ class SIPCall : public Call return *sdp_; } +#if USE_CCRTP /** * Returns a pointer to the AudioRtp object */ - sfl::AudioRtpFactory & getAudioRtp() { + sfl::AudioRtpFactory& getAudioRtp() { return audiortp_; } +#else + + /** + * Returns a pointer to the AVFormatRtpSession object + */ + sfl::AVFormatRtpSession& getAVFormatRTP() const { + return *avformatrtp_; + } +#endif #ifdef SFL_VIDEO /** * Returns a pointer to the VideoRtp object */ - sfl_video::VideoRtpSession &getVideoRtp () { + sfl_video::VideoRtpSession& getVideoRtp () { return videortp_; } #endif @@ -196,10 +212,14 @@ class SIPCall : public Call int SIPSessionReinvite(); +#if USE_CCRTP /** * Audio Rtp Session factory */ sfl::AudioRtpFactory audiortp_; +#else + std::unique_ptr<sfl::AVFormatRtpSession> avformatrtp_; +#endif #ifdef SFL_VIDEO /** diff --git a/daemon/src/sip/sipvoiplink.cpp b/daemon/src/sip/sipvoiplink.cpp index 9ee2577960892614c3e53e61ed81359a2620e685..2be08be206dfbf7af2355350f35fc559d151c48a 100644 --- a/daemon/src/sip/sipvoiplink.cpp +++ b/daemon/src/sip/sipvoiplink.cpp @@ -65,6 +65,10 @@ #include "audio/audiolayer.h" +#ifndef USE_CCRTP +#include "audio/audiortp/avformat_rtp_session.h" +#endif + #ifdef SFL_VIDEO #include "video/video_rtp_session.h" #include "client/videomanager.h" @@ -323,19 +327,24 @@ transaction_request_cb(pjsip_rx_data *rdata) call->initRecFilename(peerNumber); call->setCallMediaLocal(addrToUse); call->getSDP().setPublishedIP(addrSdp); +#if USE_CCRTP call->getAudioRtp().initConfig(); +#endif call->setTransport(transport); +#if USE_CCRTP try { call->getAudioRtp().initSession(); } catch (const ost::Socket::Error &err) { SFL_ERR("AudioRtp socket error"); return PJ_FALSE; } +#endif if (account->isStunEnabled()) call->updateSDPFromSTUN(); +#if USE_CCRTP if (body and body->len > 0 and call->getAudioRtp().isSdesEnabled()) { std::string sdpOffer(static_cast<const char*>(body->data), body->len); size_t start = sdpOffer.find("a=crypto:"); @@ -367,6 +376,7 @@ transaction_request_cb(pjsip_rx_data *rdata) #endif } } +#endif call->getSDP().receiveOffer(r_sdp, account->getActiveAudioCodecs(), account->getActiveVideoCodecs()); @@ -379,7 +389,9 @@ transaction_request_cb(pjsip_rx_data *rdata) std::vector<sfl::AudioCodec *> audioCodecs; audioCodecs.push_back(ac); +#if USE_CCRTP call->getAudioRtp().start(audioCodecs); +#endif pjsip_dialog *dialog = 0; @@ -1020,14 +1032,22 @@ sdp_media_update_cb(pjsip_inv_session *inv, pj_status_t status) // Update connection information sdp.setMediaTransportInfoFromRemoteSdp(); +#if USE_CCRTP auto& audioRTP = call->getAudioRtp(); try { audioRTP.updateDestinationIpAddress(); } catch (const AudioRtpFactoryException &e) { SFL_ERR("%s", e.what()); } - audioRTP.setDtmfPayloadType(sdp.getTelephoneEventType()); +#else + call->getAVFormatRTP().updateSDP(sdp); + call->getAVFormatRTP().updateDestination(sdp.getRemoteIP(), sdp.getRemoteAudioPort()); + auto localAudioPort = sdp.getLocalAudioPort(); + if (!localAudioPort) + localAudioPort = sdp.getRemoteAudioPort(); + call->getAVFormatRTP().start(localAudioPort); +#endif #ifdef SFL_VIDEO auto& videoRTP = call->getVideoRtp(); @@ -1039,9 +1059,9 @@ sdp_media_update_cb(pjsip_inv_session *inv, pj_status_t status) // Get the crypto attribute containing srtp's cryptographic context (keys, cipher) CryptoOffer crypto_offer; - call->getSDP().getRemoteSdpCryptoFromOffer(remoteSDP, crypto_offer); + sdp.getRemoteSdpCryptoFromOffer(remoteSDP, crypto_offer); -#if HAVE_SDES +#if USE_CCRTP && HAVE_SDES bool nego_success = false; if (!crypto_offer.empty()) { @@ -1113,8 +1133,10 @@ sdp_media_update_cb(pjsip_inv_session *inv, pj_status_t status) } } +#if USE_CCRTP if (not audioCodecs.empty()) call->getAudioRtp().updateSessionMedia(audioCodecs); +#endif } catch (const SdpException &e) { SFL_ERR("%s", e.what()); } catch (const std::exception &rtpException) { diff --git a/daemon/src/video/libav_deps.h b/daemon/src/video/libav_deps.h index 2f9e9af8ab81673c26993c55f99f11f2a2a506e7..21402b956ffde1be08dbdde35b24d9d771e9c202 100644 --- a/daemon/src/video/libav_deps.h +++ b/daemon/src/video/libav_deps.h @@ -66,6 +66,7 @@ extern "C" { #endif #include <libavutil/pixdesc.h> #include <libavutil/opt.h> +#include <libavutil/channel_layout.h> #include <libavutil/mathematics.h> // for av_rescale_q (old libav support) #include <libavutil/imgutils.h> #include <libavutil/intreadwrite.h> @@ -96,4 +97,17 @@ static inline const AVPixFmtDescriptor *av_pix_fmt_desc_get(enum AVPixelFormat p #define avcodec_free_frame(x) av_freep(x) #endif +// Especially for Fedora < 20 and UBUNTU < 14.10 +#define USE_OLD_AVU ! LIBAVUTIL_VERSION_CHECK(52, 8, 0, 19, 100) + +#if USE_OLD_AVU +#define av_frame_alloc avcodec_alloc_frame +#define av_frame_free avcodec_free_frame +#define av_frame_unref avcodec_get_frame_defaults +#define av_frame_get_buffer(x, y) avpicture_alloc((AVPicture *)(x), \ + (AVPixelFormat)(x)->format, \ + (x)->width, (x)->height) +#endif + + #endif // __LIBAV_DEPS_H__ diff --git a/daemon/src/video/libav_utils.cpp b/daemon/src/video/libav_utils.cpp index bb1fb5603d7e806fa665f0b669a30771da510a03..8f82c4f28828de40755e0c87c8dacf7a30db0e68 100644 --- a/daemon/src/video/libav_utils.cpp +++ b/daemon/src/video/libav_utils.cpp @@ -125,6 +125,12 @@ static void init_once() encoders_["VP8"] = "libvpx"; encoders_["MP4V-ES"] = "mpeg4"; + encoders_["PCMA"] = "pcm_alaw"; + encoders_["PCMU"] = "pcm_mulaw"; + encoders_["opus"] = "libopus"; + encoders_["G722"] = "g722"; + encoders_["speex"] = "libspeex"; + //FFmpeg needs to be modified to allow us to send configuration //inline, with CODEC_FLAG_GLOBAL_HEADER //encoders["THEORA"] = "libtheora"; diff --git a/daemon/src/video/video_base.cpp b/daemon/src/video/video_base.cpp index e75cc2356aa7a4d38c9bf3b098f12b2cd924a229..e58d457e844bb16fdbecd90850802d6281374f39 100644 --- a/daemon/src/video/video_base.cpp +++ b/daemon/src/video/video_base.cpp @@ -34,18 +34,6 @@ #include "video_base.h" #include "logger.h" -// Especially for Fedora < 20 and UBUNTU < 14.10 -#define USE_OLD_AVU ! LIBAVUTIL_VERSION_CHECK(52, 8, 0, 19, 100) - -#if USE_OLD_AVU -#define av_frame_alloc avcodec_alloc_frame -#define av_frame_free avcodec_free_frame -#define av_frame_unref avcodec_get_frame_defaults -#define av_frame_get_buffer(x, y) avpicture_alloc((AVPicture *)(x), \ - (AVPixelFormat)(x)->format, \ - (x)->width, (x)->height) -#endif - namespace sfl_video { /*=== VideoPacket ===========================================================*/ diff --git a/daemon/src/video/video_decoder.cpp b/daemon/src/video/video_decoder.cpp index faa29befbc30071fcc9c6fa61baa844796b33dd7..b541495630a1ff78b1ef04bd349b71a5bd6ff8af 100644 --- a/daemon/src/video/video_decoder.cpp +++ b/daemon/src/video/video_decoder.cpp @@ -32,6 +32,9 @@ // libav_deps.h must be included first #include "libav_deps.h" #include "video_decoder.h" +#include "audio/audiobuffer.h" +#include "audio/ringbuffer.h" +#include "audio/resampler.h" #include "logger.h" #include <iostream> @@ -116,6 +119,82 @@ void VideoDecoder::setInterruptCallback(int (*cb)(void*), void *opaque) void VideoDecoder::setIOContext(VideoIOHandle *ioctx) { inputCtx_->pb = ioctx->getContext(); } +int VideoDecoder::setupFromAudioData() +{ + int ret; + + if (decoderCtx_) + avcodec_close(decoderCtx_); + + // Increase analyze time to solve synchronization issues between callers. + static const unsigned MAX_ANALYZE_DURATION = 30; // time in seconds + + inputCtx_->max_analyze_duration = MAX_ANALYZE_DURATION * AV_TIME_BASE; + + SFL_DBG("Finding stream info"); +#if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(53, 8, 0) + ret = av_find_stream_info(inputCtx_); +#else + ret = avformat_find_stream_info(inputCtx_, NULL); +#endif + + if (ret < 0) { + // workaround for this bug: + // http://patches.libav.org/patch/22541/ + if (ret == -1) + ret = AVERROR_INVALIDDATA; + char errBuf[64] = {0}; + // print nothing for unknown errors + if (av_strerror(ret, errBuf, sizeof errBuf) < 0) + errBuf[0] = '\0'; + + // always fail here + SFL_ERR("Could not find stream info: %s", errBuf); + return -1; + } + + // find the first audio stream from the input + for (size_t i = 0; streamIndex_ == -1 && i < inputCtx_->nb_streams; ++i) + if (inputCtx_->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) + streamIndex_ = i; + + if (streamIndex_ == -1) { + SFL_ERR("Could not find audio stream"); + return -1; + } + + // Get a pointer to the codec context for the video stream + decoderCtx_ = inputCtx_->streams[streamIndex_]->codec; + if (decoderCtx_ == 0) { + SFL_ERR("Decoder context is NULL"); + return -1; + } + + // find the decoder for the video stream + inputDecoder_ = avcodec_find_decoder(decoderCtx_->codec_id); + if (!inputDecoder_) { + SFL_ERR("Unsupported codec"); + return -1; + } + + decoderCtx_->thread_count = 1; + if (emulateRate_) { + SFL_DBG("Using framerate emulation"); + startTime_ = av_gettime(); + } + +#if LIBAVCODEC_VERSION_MAJOR >= 55 + decoderCtx_->refcounted_frames = 1; +#endif + ret = avcodec_open2(decoderCtx_, inputDecoder_, NULL); + if (ret) { + SFL_ERR("Could not open codec"); + return -1; + } + + return 0; +} + int VideoDecoder::setupFromVideoData() { int ret; @@ -248,6 +327,63 @@ VideoDecoder::decode(VideoFrame& result, VideoPacket& video_packet) return Status::Success; } +VideoDecoder::Status +VideoDecoder::decode_audio(AVFrame *decoded_frame) +{ + AVPacket inpacket; + memset(&inpacket, 0, sizeof(inpacket)); + av_init_packet(&inpacket); + inpacket.data = NULL; + inpacket.size = 0; + + int ret = av_read_frame(inputCtx_, &inpacket); + if (ret == AVERROR(EAGAIN)) { + return Status::Success; + } else if (ret == AVERROR_EOF) { + return Status::EOFError; + } else if (ret < 0) { + char errbuf[64]; + av_strerror(ret, errbuf, sizeof(errbuf)); + SFL_ERR("Couldn't read frame: %s\n", errbuf); + return Status::ReadError; + } + + // is this a packet from the audio stream? + if (inpacket.stream_index != streamIndex_) + return Status::Success; + + int frameFinished = 0; + int len = avcodec_decode_audio4(decoderCtx_, decoded_frame, + &frameFinished, &inpacket); + if (len <= 0) { + return Status::DecodeError; + } + + if (frameFinished) { + if (emulateRate_) { + if (decoded_frame->pkt_dts != AV_NOPTS_VALUE) { + const auto now = std::chrono::system_clock::now(); + const std::chrono::duration<double> seconds = now - lastFrameClock_; + const double dTB = av_q2d(inputCtx_->streams[streamIndex_]->time_base); + const double dts_diff = dTB * (decoded_frame->pkt_dts - lastDts_); + const double usDelay = 1e6 * (dts_diff - seconds.count()); + if (usDelay > 0.0) { +#if LIBAVUTIL_VERSION_CHECK(51, 34, 0, 61, 100) + av_usleep(usDelay); +#else + usleep(usDelay); +#endif + } + lastFrameClock_ = now; + lastDts_ = decoded_frame->pkt_dts; + } + } + return Status::FrameFinished; + } + + return Status::Success; +} + VideoDecoder::Status VideoDecoder::flush(VideoFrame& result) { @@ -278,4 +414,32 @@ int VideoDecoder::getHeight() const int VideoDecoder::getPixelFormat() const { return libav_utils::sfl_pixel_format(decoderCtx_->pix_fmt); } +void VideoDecoder::writeToRingBuffer(AVFrame* decoded_frame, + sfl::RingBuffer& rb, + const sfl::AudioFormat outFormat) +{ + const sfl::AudioFormat decoderFormat = { + (unsigned) decoded_frame->sample_rate, + (unsigned) decoderCtx_->channels + }; + + sfl::AudioBuffer out(decoded_frame->nb_samples, decoderFormat); + + out.deinterleave(reinterpret_cast<const SFLAudioSample*>(decoded_frame->data[0]), + decoded_frame->nb_samples, decoderCtx_->channels); + if ((unsigned)decoded_frame->sample_rate != outFormat.sample_rate) { + if (!resampler_) { + SFL_DBG("Creating audio resampler"); + resampler_.reset(new sfl::Resampler(outFormat)); + } + sfl::AudioBuffer resampledData(decoded_frame->nb_samples, + {(unsigned) outFormat.sample_rate, + (unsigned) decoderCtx_->channels}); + resampler_->resample(out, resampledData); + rb.put(resampledData); + } else { + rb.put(out); + } +} + } diff --git a/daemon/src/video/video_decoder.h b/daemon/src/video/video_decoder.h index 69e092733a7978932e827960ebe0f771d1999d49..6b2ad115781415a79375f2b772ab0741650bc593 100644 --- a/daemon/src/video/video_decoder.h +++ b/daemon/src/video/video_decoder.h @@ -38,6 +38,14 @@ #include <map> #include <string> +#include <memory> + +namespace sfl { + class AudioBuffer; + class AudioFormat; + class RingBuffer; + class Resampler; +} class AVCodecContext; class AVStream; @@ -65,7 +73,11 @@ namespace sfl_video { int openInput(const std::string &source_str, const std::string &format_str); int setupFromVideoData(); + int setupFromAudioData(); Status decode(VideoFrame&, VideoPacket&); + Status decode_audio(AVFrame* frame); + void writeToRingBuffer(AVFrame* frame, sfl::RingBuffer& rb, + const sfl::AudioFormat outFormat); Status flush(VideoFrame&); int getWidth() const; @@ -80,6 +92,7 @@ namespace sfl_video { AVCodec *inputDecoder_ = nullptr; AVCodecContext *decoderCtx_ = nullptr; AVFormatContext *inputCtx_ = nullptr; + std::unique_ptr<sfl::Resampler> resampler_; int streamIndex_ = -1; bool emulateRate_ = false; int64_t startTime_; diff --git a/daemon/src/video/video_encoder.cpp b/daemon/src/video/video_encoder.cpp index 724c57ac476e3959a1aa9c927890047ddc47dbf7..08f79898b01aef29e2c6804068221f8e7df767e3 100644 --- a/daemon/src/video/video_encoder.cpp +++ b/daemon/src/video/video_encoder.cpp @@ -31,10 +31,12 @@ #include "libav_deps.h" #include "video_encoder.h" +#include "audio/audiobuffer.h" #include "logger.h" #include <iostream> #include <sstream> +#include <algorithm> namespace sfl_video { @@ -78,17 +80,28 @@ void VideoEncoder::setOptions(const std::map<std::string, std::string>& options) const char *value; value = extract(options, "width"); - if (!value) - throw VideoEncoderException("width option not set"); - av_dict_set(&options_, "width", value, 0); + if (value) + av_dict_set(&options_, "width", value, 0); value = extract(options, "height"); - if (!value) - throw VideoEncoderException("height option not set"); - av_dict_set(&options_, "height", value, 0); + if (value) + av_dict_set(&options_, "height", value, 0); value = extract(options, "bitrate") ? : ""; - av_dict_set(&options_, "bitrate", value, 0); + if (value) + av_dict_set(&options_, "bitrate", value, 0); + + value = extract(options, "sample_rate") ? : ""; + if (value) + av_dict_set(&options_, "sample_rate", value, 0); + + value = extract(options, "channels") ? : ""; + if (value) + av_dict_set(&options_, "channels", value, 0); + + value = extract(options, "frame_size") ? : ""; + if (value) + av_dict_set(&options_, "frame_size", value, 0); value = extract(options, "framerate"); if (value) @@ -105,7 +118,7 @@ void VideoEncoder::setOptions(const std::map<std::string, std::string>& options) void VideoEncoder::openOutput(const char *enc_name, const char *short_name, - const char *filename, const char *mime_type) + const char *filename, const char *mime_type, bool is_video) { AVOutputFormat *oformat = av_guess_format(short_name, filename, mime_type); @@ -126,7 +139,7 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name, throw VideoEncoderException("No output encoder"); } - prepareEncoderContext(); + prepareEncoderContext(is_video); /* let x264 preset override our encoder settings */ if (!strcmp(enc_name, "libx264")) { @@ -161,26 +174,28 @@ VideoEncoder::openOutput(const char *enc_name, const char *short_name, stream_->codec = encoderCtx_; - // allocate buffers for both scaled (pre-encoder) and encoded frames - const int width = encoderCtx_->width; - const int height = encoderCtx_->height; - const int format = libav_utils::sfl_pixel_format((int)encoderCtx_->pix_fmt); - scaledFrameBufferSize_ = scaledFrame_.getSize(width, height, format); - if (scaledFrameBufferSize_ <= FF_MIN_BUFFER_SIZE) - throw VideoEncoderException("buffer too small"); + if (is_video) { + // allocate buffers for both scaled (pre-encoder) and encoded frames + const int width = encoderCtx_->width; + const int height = encoderCtx_->height; + const int format = libav_utils::sfl_pixel_format((int)encoderCtx_->pix_fmt); + scaledFrameBufferSize_ = scaledFrame_.getSize(width, height, format); + if (scaledFrameBufferSize_ <= FF_MIN_BUFFER_SIZE) + throw VideoEncoderException("buffer too small"); #if (LIBAVCODEC_VERSION_MAJOR < 54) - encoderBufferSize_ = scaledFrameBufferSize_; // seems to be ok - encoderBuffer_ = (uint8_t*) av_malloc(encoderBufferSize_); - if (!encoderBuffer_) - throw VideoEncoderException("Could not allocate encoder buffer"); + encoderBufferSize_ = scaledFrameBufferSize_; // seems to be ok + encoderBuffer_ = (uint8_t*) av_malloc(encoderBufferSize_); + if (!encoderBuffer_) + throw VideoEncoderException("Could not allocate encoder buffer"); #endif - scaledFrameBuffer_ = (uint8_t*) av_malloc(scaledFrameBufferSize_); - if (!scaledFrameBuffer_) - throw VideoEncoderException("Could not allocate scaled frame buffer"); + scaledFrameBuffer_ = (uint8_t*) av_malloc(scaledFrameBufferSize_); + if (!scaledFrameBuffer_) + throw VideoEncoderException("Could not allocate scaled frame buffer"); - scaledFrame_.setDestination(scaledFrameBuffer_, width, height, format); + scaledFrame_.setDestination(scaledFrameBuffer_, width, height, format); + } } void VideoEncoder::setInterruptCallback(int (*cb)(void*), void *opaque) @@ -306,6 +321,96 @@ int VideoEncoder::encode(VideoFrame &input, bool is_keyframe, int64_t frame_numb return ret; } +int VideoEncoder::encode_audio(const sfl::AudioBuffer &buffer) +{ + const int needed_bytes = av_samples_get_buffer_size(NULL, buffer.channels(), buffer.frames(), AV_SAMPLE_FMT_S16, 0); + if (needed_bytes < 0) { + SFL_ERR("Couldn't calculate buffer size"); + return -1; + } + + SFLAudioSample *sample_data = reinterpret_cast<SFLAudioSample*>(av_malloc(needed_bytes)); + if (!sample_data) + return -1; + + SFLAudioSample *offset_ptr = sample_data; + int nb_frames = buffer.frames(); + + buffer.interleave(sample_data); + const auto layout = buffer.channels() == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; + const auto sample_rate = buffer.getSampleRate(); + + while (nb_frames > 0) { + AVFrame *frame = avcodec_alloc_frame(); + if (!frame) { + av_freep(&sample_data); + return -1; + } + + if (encoderCtx_->frame_size) + frame->nb_samples = std::min<int>(nb_frames, encoderCtx_->frame_size); + else + frame->nb_samples = nb_frames; + + frame->format = AV_SAMPLE_FMT_S16; + frame->channel_layout = layout; + frame->sample_rate = sample_rate; + + const auto buffer_size = av_samples_get_buffer_size(NULL, buffer.channels(), frame->nb_samples, AV_SAMPLE_FMT_S16, 0); + + int err = avcodec_fill_audio_frame(frame, buffer.channels(), AV_SAMPLE_FMT_S16, + reinterpret_cast<const uint8_t *>(offset_ptr), buffer_size, 0); + if (err < 0) { + char errbuf[128]; + av_strerror(err, errbuf, sizeof(errbuf)); + SFL_ERR("Couldn't fill audio frame: %s: %d %d", errbuf, frame->nb_samples, buffer_size); + av_freep(&sample_data); + av_frame_free(&frame); + return -1; + } + nb_frames -= frame->nb_samples; + offset_ptr += frame->nb_samples * buffer.channels(); + + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = NULL; // packet data will be allocated by the encoder + pkt.size = 0; + + int got_packet; + int ret = avcodec_encode_audio2(encoderCtx_, &pkt, frame, &got_packet); + if (ret < 0) { + print_averror("avcodec_encode_audio2", ret); + av_free_packet(&pkt); + av_freep(&sample_data); + av_frame_free(&frame); + return ret; + } + + if (pkt.size and got_packet) { + if (pkt.pts != AV_NOPTS_VALUE) + pkt.pts = av_rescale_q(pkt.pts, encoderCtx_->time_base, stream_->time_base); + if (pkt.dts != AV_NOPTS_VALUE) + pkt.dts = av_rescale_q(pkt.dts, encoderCtx_->time_base, stream_->time_base); + + pkt.stream_index = stream_->index; + + // write the compressed frame + ret = av_write_frame(outputCtx_, &pkt); + if (ret < 0) + print_averror("av_write_frame", ret); + } + + av_free_packet(&pkt); + av_frame_free(&frame); + } + + //SFL_WARN("%d", *std::max_element(sample_data, sample_data + needed_bytes / 2)); + + av_freep(&sample_data); + + return 0; +} + int VideoEncoder::flush() { AVPacket pkt; @@ -370,7 +475,7 @@ void VideoEncoder::print_sdp(std::string &sdp_) SFL_DBG("Sending SDP: \n%s", sdp_.c_str()); } -void VideoEncoder::prepareEncoderContext() +void VideoEncoder::prepareEncoderContext(bool is_video) { #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 12, 0) encoderCtx_ = avcodec_alloc_context(); @@ -385,26 +490,61 @@ void VideoEncoder::prepareEncoderContext() NULL, 0)->value); SFL_DBG("Using bitrate %d", encoderCtx_->bit_rate); - // resolution must be a multiple of two - char *width = av_dict_get(options_, "width", NULL, 0)->value; - dstWidth_ = encoderCtx_->width = width ? atoi(width) : 0; - char *height = av_dict_get(options_, "height", NULL, 0)->value; - dstHeight_ = encoderCtx_->height = height ? atoi(height) : 0; - - const char *framerate = av_dict_get(options_, "framerate", - NULL, 0)->value; - const int DEFAULT_FPS = 30; - const int fps = framerate ? atoi(framerate) : DEFAULT_FPS; - encoderCtx_->time_base = (AVRational) {1, fps}; - // emit one intra frame every gop_size frames - encoderCtx_->max_b_frames = 0; - encoderCtx_->pix_fmt = PIXEL_FORMAT(YUV420P); // TODO: option me ! - - // Fri Jul 22 11:37:59 EDT 2011:tmatth:XXX: DON'T set this, we want our - // pps and sps to be sent in-band for RTP - // This is to place global headers in extradata instead of every - // keyframe. - // encoderCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER; + if (is_video) { + // resolution must be a multiple of two + char *width = av_dict_get(options_, "width", NULL, 0)->value; + dstWidth_ = encoderCtx_->width = width ? atoi(width) : 0; + char *height = av_dict_get(options_, "height", NULL, 0)->value; + dstHeight_ = encoderCtx_->height = height ? atoi(height) : 0; + + const char *framerate = av_dict_get(options_, "framerate", + NULL, 0)->value; + const int DEFAULT_FPS = 30; + const int fps = framerate ? atoi(framerate) : DEFAULT_FPS; + encoderCtx_->time_base = (AVRational) {1, fps}; + // emit one intra frame every gop_size frames + encoderCtx_->max_b_frames = 0; + encoderCtx_->pix_fmt = PIXEL_FORMAT(YUV420P); // TODO: option me ! + + // Fri Jul 22 11:37:59 EDT 2011:tmatth:XXX: DON'T set this, we want our + // pps and sps to be sent in-band for RTP + // This is to place global headers in extradata instead of every + // keyframe. + // encoderCtx_->flags |= CODEC_FLAG_GLOBAL_HEADER; + + } else { + encoderCtx_->sample_fmt = AV_SAMPLE_FMT_S16; + auto v = av_dict_get(options_, "sample_rate", NULL, 0); + if (v) { + encoderCtx_->sample_rate = atoi(v->value); + } else { + SFL_WARN("No sample rate set"); + encoderCtx_->sample_rate = 8000; + } + + v = av_dict_get(options_, "channels", NULL, 0); + if (v) { + auto c = std::atoi(v->value); + if (c > 2 or c < 1) { + SFL_WARN("Clamping invalid channel value %d", c); + c = 1; + } + encoderCtx_->channels = c; + } else { + SFL_WARN("Channels not set"); + encoderCtx_->channels = 1; + } + + encoderCtx_->channel_layout = encoderCtx_->channels == 2 ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; + + v = av_dict_get(options_, "frame_size", NULL, 0); + if (v) { + encoderCtx_->frame_size = atoi(v->value); + SFL_WARN("Frame size %d", encoderCtx_->frame_size); + } else { + SFL_WARN("Frame size not set"); + } + } } void VideoEncoder::forcePresetX264() diff --git a/daemon/src/video/video_encoder.h b/daemon/src/video/video_encoder.h index 95c32d6fc78e34d01985799d6c90b6ec21d102de..92e0d58322a873b521927cfbd93c6101ef585e8f 100644 --- a/daemon/src/video/video_encoder.h +++ b/daemon/src/video/video_encoder.h @@ -44,6 +44,10 @@ class AVStream; class AVFormatContext; class AVCodec; +namespace sfl { + class AudioBuffer; +} + namespace sfl_video { class VideoEncoderException : public std::runtime_error { @@ -61,9 +65,10 @@ public: void setInterruptCallback(int (*cb)(void*), void *opaque); void setIOContext(const std::unique_ptr<VideoIOHandle> &ioctx); void openOutput(const char *enc_name, const char *short_name, - const char *filename, const char *mime_type); + const char *filename, const char *mime_type, bool is_video); void startIO(); int encode(VideoFrame &input, bool is_keyframe, int64_t frame_number); + int encode_audio(const sfl::AudioBuffer &input); int flush(); void print_sdp(std::string &sdp_); @@ -76,7 +81,7 @@ public: private: NON_COPYABLE(VideoEncoder); void setScaleDest(void *data, int width, int height, int pix_fmt); - void prepareEncoderContext(); + void prepareEncoderContext(bool is_video); void forcePresetX264(); void extractProfileLevelID(const std::string ¶meters, AVCodecContext *ctx); diff --git a/daemon/src/video/video_sender.cpp b/daemon/src/video/video_sender.cpp index ee4387ab74d18f9bdfeb1a09ea9613b5a9f45ac5..c109bd6cc97d856669802e0af40565983aff0338 100644 --- a/daemon/src/video/video_sender.cpp +++ b/daemon/src/video/video_sender.cpp @@ -54,7 +54,7 @@ VideoSender::VideoSender(std::map<string, string> args, /* Encoder setup (may throw VideoEncoderException) */ videoEncoder_->setOptions(args); - videoEncoder_->openOutput(enc_name, "rtp", dest, NULL); + videoEncoder_->openOutput(enc_name, "rtp", dest, NULL, true); videoEncoder_->setIOContext(muxContext_); videoEncoder_->startIO();