media_encoder.cpp 27.5 KB
Newer Older
1
/*
2
 *  Copyright (C) 2013-2019 Savoir-faire Linux Inc.
3
 *
4
 *  Author: Guillaume Roguez <Guillaume.Roguez@savoirfairelinux.com>
5
 *  Author: Eloi Bail <Eloi.Bail@savoirfairelinux.com>
6
 *  Author: Philippe Gorley <philippe.gorley@savoirfairelinux.com>
7 8 9 10 11 12 13 14 15 16 17 18 19
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
20
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA.
21 22
 */

23
#include "libav_deps.h" // MUST BE INCLUDED FIRST
Guillaume Roguez's avatar
Guillaume Roguez committed
24
#include "media_codec.h"
25
#include "media_encoder.h"
26
#include "media_buffer.h"
Guillaume Roguez's avatar
Guillaume Roguez committed
27

28
#include "client/ring_signal.h"
29
#include "fileutils.h"
Tristan Matthews's avatar
Tristan Matthews committed
30
#include "logger.h"
31 32
#include "manager.h"
#include "string_utils.h"
33
#include "system_codec_container.h"
34 35 36 37

#ifdef RING_ACCEL
#include "video/accel.h"
#endif
38

39 40 41 42
extern "C" {
#include <libavutil/parseutils.h>
}

43 44
#include <algorithm>
#include <fstream>
45
#include <iostream>
46
#include <json/json.h>
47
#include <sstream>
48
#include <thread> // hardware_concurrency
49

50 51
// Define following line if you need to debug libav SDP
//#define DEBUG_SDP 1
52

53
namespace ring {
54

55 56
MediaEncoder::MediaEncoder()
    : outputCtx_(avformat_alloc_context())
57
{}
58

59
MediaEncoder::~MediaEncoder()
60
{
61
    if (outputCtx_) {
62 63
        if (outputCtx_->priv_data)
            av_write_trailer(outputCtx_);
64
        for (auto encoderCtx : encoders_) {
65 66 67 68 69 70 71
            if (encoderCtx) {
#ifndef _MSC_VER
                avcodec_free_context(&encoderCtx);
#else
                avcodec_close(encoderCtx);
#endif
            }
72
        }
73 74 75
        avformat_free_context(outputCtx_);
    }
    av_dict_free(&options_);
76 77
}

78
void
79
MediaEncoder::setOptions(const MediaStream& opts)
80
{
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    if (!opts.isValid()) {
        RING_ERR() << "Invalid options";
        return;
    }

    if (opts.isVideo) {
        videoOpts_ = opts;
        // Make sure width and height are even (required by x264)
        // This is especially for image/gif streaming, as video files and cameras usually have even resolutions
        videoOpts_.width -= videoOpts_.width % 2;
        videoOpts_.height -= videoOpts_.height % 2;
        if (not videoOpts_.frameRate)
            videoOpts_.frameRate = 30;
    } else {
        audioOpts_ = opts;
    }
97 98
}

99 100
void
MediaEncoder::setOptions(const MediaDescription& args)
101
{
102 103 104
    libav_utils::setDictValue(&options_, "payload_type", ring::to_string(args.payload_type));
    libav_utils::setDictValue(&options_, "max_rate", ring::to_string(args.codec->bitrate));
    libav_utils::setDictValue(&options_, "crf", ring::to_string(args.codec->quality));
105

Guillaume Roguez's avatar
Guillaume Roguez committed
106
    if (not args.parameters.empty())
107
        libav_utils::setDictValue(&options_, "parameters", args.parameters);
108 109
}

110
void
111
MediaEncoder::setMetadata(const std::string& title, const std::string& description)
112
{
113 114 115 116
    if (not title.empty())
        libav_utils::setDictValue(&outputCtx_->metadata, "title", title);
    if (not description.empty())
        libav_utils::setDictValue(&outputCtx_->metadata, "description", description);
117 118
}

119 120 121 122 123
void
MediaEncoder::setInitSeqVal(uint16_t seqVal)
{
    //only set not default value (!=0)
    if (seqVal != 0)
124
        av_opt_set_int(outputCtx_, "seq", seqVal, AV_OPT_SEARCH_CHILDREN);
125 126 127 128 129
}

uint16_t
MediaEncoder::getLastSeqValue()
{
130 131 132
    int64_t retVal;
    if (av_opt_get_int(outputCtx_, "seq", AV_OPT_SEARCH_CHILDREN, &retVal) >= 0)
        return (uint16_t)retVal;
133 134
    else
        return 0;
135 136
}

Gasuleg's avatar
Gasuleg committed
137 138 139
std::string
MediaEncoder::getEncoderName() const
{
140 141 142 143
    if (videoOpts_.isValid())
        return videoCodec_;
    else
        return audioCodec_;
Gasuleg's avatar
Gasuleg committed
144 145
}

146
void
147
MediaEncoder::openOutput(const std::string& filename, const std::string& format)
148 149
{
    avformat_free_context(outputCtx_);
150 151 152 153
    if (format.empty())
        avformat_alloc_output_context2(&outputCtx_, nullptr, nullptr, filename.c_str());
    else
        avformat_alloc_output_context2(&outputCtx_, nullptr, format.c_str(), filename.c_str());
154 155 156 157

#ifdef RING_ACCEL
    enableAccel_ = Manager::instance().videoPreferences.getEncodingAccelerated();
#endif
158 159 160
}

int
161
MediaEncoder::addStream(const SystemCodecInfo& systemCodecInfo)
162 163 164
{
    if (systemCodecInfo.mediaType == MEDIA_AUDIO) {
        audioCodec_ = systemCodecInfo.name;
165
        return initStream(systemCodecInfo, nullptr);
166 167 168 169 170 171 172 173 174 175 176
    } else {
        videoCodec_ = systemCodecInfo.name;
        // TODO only support 1 audio stream and 1 video stream per encoder
        if (audioOpts_.isValid())
            return 1; // stream will be added to AVFormatContext after audio stream
        else
            return 0; // only a video stream
    }
}

int
177
MediaEncoder::initStream(const std::string& codecName, AVBufferRef* framesCtx)
178 179 180
{
    const auto codecInfo = getSystemCodecContainer()->searchCodecByName(codecName, MEDIA_ALL);
    if (codecInfo)
181
        return initStream(*codecInfo, framesCtx);
182 183 184 185 186
    else
        return -1;
}

int
187
MediaEncoder::initStream(const SystemCodecInfo& systemCodecInfo, AVBufferRef* framesCtx)
188 189 190
{
    AVCodec* outputCodec = nullptr;
    AVCodecContext* encoderCtx = nullptr;
191 192 193 194
#ifdef RING_ACCEL
    if (systemCodecInfo.mediaType == MEDIA_VIDEO) {
        if (enableAccel_) {
            if (accel_ = video::HardwareAccel::setupEncoder(
195 196
                static_cast<AVCodecID>(systemCodecInfo.avcodecId),
                videoOpts_.width, videoOpts_.height, framesCtx)) {
197 198 199 200 201 202 203 204
                outputCodec = avcodec_find_encoder_by_name(accel_->getCodecName().c_str());
            }
        } else {
            RING_WARN() << "Hardware encoding disabled";
        }
    }
#endif

205
    if (!outputCodec) {
206 207 208 209 210 211 212 213 214 215 216 217
        /* find the video encoder */
        if (systemCodecInfo.avcodecId == AV_CODEC_ID_H263)
            // For H263 encoding, we force the use of AV_CODEC_ID_H263P (H263-1998)
            // H263-1998 can manage all frame sizes while H263 don't
            // AV_CODEC_ID_H263 decoder will be used for decoding
            outputCodec = avcodec_find_encoder(AV_CODEC_ID_H263P);
        else
            outputCodec = avcodec_find_encoder(static_cast<AVCodecID>(systemCodecInfo.avcodecId));
        if (!outputCodec) {
            RING_ERR("Encoder \"%s\" not found!", systemCodecInfo.name.c_str());
            throw MediaEncoderException("No output encoder");
        }
218 219
    }

220 221
    encoderCtx = prepareEncoderContext(outputCodec, systemCodecInfo.mediaType == MEDIA_VIDEO);
    encoders_.push_back(encoderCtx);
222 223 224

#ifdef RING_ACCEL
    if (accel_) {
225
        accel_->setDetails(encoderCtx);
226 227 228 229
        encoderCtx->opaque = accel_.get();
    }
#endif

230
    auto maxBitrate = 1000 * std::atoi(libav_utils::getDictValue(options_, "max_rate"));
231
    auto bufSize = 2 * maxBitrate; // as recommended (TODO: make it customizable)
232
    auto crf = std::atoi(libav_utils::getDictValue(options_, "crf"));
233

234
    /* let x264 preset override our encoder settings */
235
    if (systemCodecInfo.avcodecId == AV_CODEC_ID_H264) {
236 237
        auto profileLevelId = libav_utils::getDictValue(options_, "parameters");
        extractProfileLevelID(profileLevelId, encoderCtx);
238 239 240 241 242 243
#ifdef RING_ACCEL
        if (accel_)
            // limit the bitrate else it will easily go up to a few MiB/s
            encoderCtx->bit_rate = maxBitrate;
        else
#endif
244
        forcePresetX264(encoderCtx);
245
        // For H264 :
246 247 248 249 250
        // Streaming => VBV (constrained encoding) + CRF (Constant Rate Factor)
        if (crf == SystemCodecInfo::DEFAULT_NO_QUALITY)
            crf = 30; // good value for H264-720p@30
        RING_DBG("H264 encoder setup: crf=%u, maxrate=%u, bufsize=%u", crf, maxBitrate, bufSize);

251
        av_opt_set_int(encoderCtx, "crf", crf, AV_OPT_SEARCH_CHILDREN);
252 253 254
        encoderCtx->rc_buffer_size = bufSize;
        encoderCtx->rc_max_rate = maxBitrate;
    } else if (systemCodecInfo.avcodecId == AV_CODEC_ID_VP8) {
255 256 257 258
        // For VP8 :
        // 1- if quality is set use it
        // bitrate need to be set. The target bitrate becomes the maximum allowed bitrate
        // 2- otherwise set rc_max_rate and rc_buffer_size
259 260
        // Using information given on this page:
        // http://www.webmproject.org/docs/encoder-parameters/
261 262 263 264
        av_opt_set(encoderCtx, "quality", "realtime", AV_OPT_SEARCH_CHILDREN);
        av_opt_set_int(encoderCtx, "error-resilient", 1, AV_OPT_SEARCH_CHILDREN);
        av_opt_set_int(encoderCtx, "cpu-used", 7, AV_OPT_SEARCH_CHILDREN); // value obtained from testing
        av_opt_set_int(encoderCtx, "lag-in-frames", 0, AV_OPT_SEARCH_CHILDREN);
265 266
        // allow encoder to drop frames if buffers are full and
        // to undershoot target bitrate to lessen strain on resources
267 268
        av_opt_set_int(encoderCtx, "drop-frame", 25, AV_OPT_SEARCH_CHILDREN);
        av_opt_set_int(encoderCtx, "undershoot-pct", 95, AV_OPT_SEARCH_CHILDREN);
269 270 271 272 273 274
        // don't set encoderCtx->gop_size: let libvpx decide when to insert a keyframe
        encoderCtx->slices = 2; // VP8E_SET_TOKEN_PARTITIONS
        encoderCtx->qmin = 4;
        encoderCtx->qmax = 56;
        encoderCtx->rc_buffer_size = maxBitrate;
        encoderCtx->bit_rate = maxBitrate;
275
        if (crf != SystemCodecInfo::DEFAULT_NO_QUALITY) {
276
            av_opt_set_int(encoderCtx, "crf", crf, AV_OPT_SEARCH_CHILDREN);
277 278 279 280
            RING_DBG("Using quality factor %d", crf);
        } else {
            RING_DBG("Using Max bitrate %d", maxBitrate);
        }
281
    } else if (systemCodecInfo.avcodecId == AV_CODEC_ID_MPEG4) {
282 283 284
        // For MPEG4 :
        // No CRF avaiable.
        // Use CBR (set bitrate)
285 286
        encoderCtx->rc_buffer_size = maxBitrate;
        encoderCtx->bit_rate = encoderCtx->rc_min_rate = encoderCtx->rc_max_rate =  maxBitrate;
287
        RING_DBG("Using Max bitrate %d", maxBitrate);
288 289 290
    } else if (systemCodecInfo.avcodecId == AV_CODEC_ID_H263) {
        encoderCtx->bit_rate = encoderCtx->rc_max_rate =  maxBitrate;
        encoderCtx->rc_buffer_size = maxBitrate;
Éloi Bail's avatar
Éloi Bail committed
291
        RING_DBG("Using Max bitrate %d", maxBitrate);
292 293 294
    }

    // add video stream to outputformat context
295 296
    AVStream* stream = avformat_new_stream(outputCtx_, outputCodec);
    if (!stream)
297
        throw MediaEncoderException("Could not allocate stream");
298

299 300
    currentStreamIdx_ = stream->index;

301
    readConfig(&options_, encoderCtx);
302
    if (avcodec_open2(encoderCtx, outputCodec, &options_) < 0)
303 304
        throw MediaEncoderException("Could not open encoder");

305
#ifndef _WIN32
306
    avcodec_parameters_from_context(stream->codecpar, encoderCtx);
307
#else
308
    stream->codec = encoderCtx;
309
#endif
310 311
    // framerate is not copied from encoderCtx to stream
    stream->avg_frame_rate = encoderCtx->framerate;
312
#ifdef RING_VIDEO
313
    if (systemCodecInfo.mediaType == MEDIA_VIDEO) {
314
        // allocate buffers for both scaled (pre-encoder) and encoded frames
315 316
        const int width = encoderCtx->width;
        const int height = encoderCtx->height;
317 318 319 320 321 322 323 324 325
        int format = encoderCtx->pix_fmt;
#ifdef RING_ACCEL
        if (accel_) {
            // hardware encoders require a specific pixel format
            auto desc = av_pix_fmt_desc_get(encoderCtx->pix_fmt);
            if (desc && (desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
                format = accel_->getSoftwareFormat();
        }
#endif
326
        scaledFrameBufferSize_ = videoFrameSize(format, width, height);
327 328 329
        if (scaledFrameBufferSize_ < 0)
            throw MediaEncoderException(("Could not compute buffer size: " + libav_utils::getError(scaledFrameBufferSize_)).c_str());
        else if (scaledFrameBufferSize_ <= AV_INPUT_BUFFER_MIN_SIZE)
330
            throw MediaEncoderException("buffer too small");
331

332 333
        scaledFrameBuffer_.reserve(scaledFrameBufferSize_);
        scaledFrame_.setFromMemory(scaledFrameBuffer_.data(), format, width, height);
334
    }
335
#endif // RING_VIDEO
336 337

    return stream->index;
338 339
}

340
void
341
MediaEncoder::openIOContext()
342
{
343 344
    if (ioCtx_) {
        outputCtx_->pb = ioCtx_;
345 346 347 348 349 350 351 352 353 354
        outputCtx_->packet_size = outputCtx_->pb->buffer_size;
    } else {
        int ret = 0;
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(58, 7, 100)
        const char* filename = outputCtx_->url;
#else
        const char* filename = outputCtx_->filename;
#endif
        if (!(outputCtx_->oformat->flags & AVFMT_NOFILE)) {
            if ((ret = avio_open(&outputCtx_->pb, filename, AVIO_FLAG_WRITE)) < 0) {
355
                std::stringstream ss;
356
                ss << "Could not open IO context for '" << filename << "': " << libav_utils::getError(ret);
357
                throw MediaEncoderException(ss.str().c_str());
358 359 360
            }
        }
    }
361 362
}

363
void
364
MediaEncoder::startIO()
365
{
366 367
    if (!outputCtx_->pb)
        openIOContext();
368
    if (avformat_write_header(outputCtx_, options_ ? &options_ : nullptr)) {
Adrien Béraud's avatar
Adrien Béraud committed
369
        RING_ERR("Could not write header for output file... check codec parameters");
370
        throw MediaEncoderException("Failed to write output file header");
371 372
    }

373 374 375
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(58, 7, 100)
    av_dump_format(outputCtx_, 0, outputCtx_->url, 1);
#else
376
    av_dump_format(outputCtx_, 0, outputCtx_->filename, 1);
377
#endif
378
    initialized_ = true;
379 380
}

381
#ifdef RING_VIDEO
Guillaume Roguez's avatar
Guillaume Roguez committed
382 383 384
int
MediaEncoder::encode(VideoFrame& input, bool is_keyframe,
                     int64_t frame_number)
385
{
386
    if (!initialized_) {
387
        initStream(videoCodec_, input.pointer()->hw_frames_ctx);
388 389 390
        startIO();
    }

391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
    AVFrame* frame;
#ifdef RING_ACCEL
    auto desc = av_pix_fmt_desc_get(static_cast<AVPixelFormat>(input.format()));
    bool isHardware = desc && (desc->flags & AV_PIX_FMT_FLAG_HWACCEL);
    std::unique_ptr<VideoFrame> framePtr;
    if (accel_ && accel_->isLinked()) {
        // Fully accelerated pipeline, skip main memory
        frame = input.pointer();
    } else if (isHardware) {
        // Hardware decoded frame, transfer back to main memory
        // Transfer to GPU if we have a hardware encoder
        AVPixelFormat pix = (accel_ ? accel_->getSoftwareFormat() : AV_PIX_FMT_YUV420P);
        framePtr = video::HardwareAccel::transferToMainMemory(input, pix);
        if (accel_)
            framePtr = accel_->transfer(*framePtr);
        frame = framePtr->pointer();
    } else if (accel_) {
        // Software decoded frame with a hardware encoder, convert to accepted format first
        auto pix = accel_->getSoftwareFormat();
        if (input.format() != pix) {
            framePtr = scaler_.convertFormat(input, pix);
            framePtr = accel_->transfer(*framePtr);
        } else {
            framePtr = accel_->transfer(input);
        }
        frame = framePtr->pointer();
    } else {
#endif
        libav_utils::fillWithBlack(scaledFrame_.pointer());
        scaler_.scale_with_aspect(input, scaledFrame_);
        frame = scaledFrame_.pointer();
#ifdef RING_ACCEL
    }
#endif
425

426
    AVCodecContext* enc = encoders_[currentStreamIdx_];
427 428 429
    frame->pts = frame_number;
    if (enc->framerate.num != enc->time_base.den || enc->framerate.den != enc->time_base.num)
        frame->pts /= (rational<int64_t>(enc->framerate) * rational<int64_t>(enc->time_base)).real<int64_t>();
430 431

    if (is_keyframe) {
432
        frame->pict_type = AV_PICTURE_TYPE_I;
433
        frame->key_frame = 1;
434
    } else {
435
        frame->pict_type = AV_PICTURE_TYPE_NONE;
436
        frame->key_frame = 0;
437
    }
438

439
    return encode(frame, currentStreamIdx_);
440
}
441
#endif // RING_VIDEO
442

443 444
int
MediaEncoder::encodeAudio(AudioFrame& frame)
445
{
446 447 448 449 450 451 452
    if (!initialized_) {
        // Initialize on first video frame, or first audio frame if no video stream
        if (not videoOpts_.isValid())
            startIO();
        else
            return 0;
    }
453
    frame.pointer()->pts = sent_samples;
454 455
    sent_samples += frame.pointer()->nb_samples;
    encode(frame.pointer(), currentStreamIdx_);
456 457 458
    return 0;
}

459 460
int
MediaEncoder::encode(AVFrame* frame, int streamIdx)
461
{
462 463 464 465
    if (!initialized_) {
        // Initialize on first video frame, or first audio frame if no video stream
        bool isVideo = (frame->width > 0 && frame->height > 0);
        if (isVideo or not videoOpts_.isValid()) {
466
            streamIdx = initStream(videoCodec_, frame->hw_frames_ctx);
467 468 469 470 471
            startIO();
        } else {
            return 0;
        }
    }
472
    int ret = 0;
473
    AVCodecContext* encoderCtx = encoders_[streamIdx];
474
    AVPacket pkt;
475
    av_init_packet(&pkt);
476
    pkt.data = nullptr; // packet data will be allocated by the encoder
477
    pkt.size = 0;
478

479
    ret = avcodec_send_frame(encoderCtx, frame);
480 481 482
    if (ret < 0)
        return -1;

483
    while (ret >= 0) {
484
        ret = avcodec_receive_packet(encoderCtx, &pkt);
485 486
        if (ret == AVERROR(EAGAIN))
            break;
487
        if (ret < 0 && ret != AVERROR_EOF) { // we still want to write our frame on EOF
488 489 490
            RING_ERR() << "Failed to encode frame: " << libav_utils::getError(ret);
            return ret;
        }
491 492

        if (pkt.size) {
Philippe Gorley's avatar
Philippe Gorley committed
493
            if (send(pkt, streamIdx))
494 495 496
                break;
        }
    }
497

498
    av_packet_unref(&pkt);
499 500
    return 0;
}
501

502
bool
Philippe Gorley's avatar
Philippe Gorley committed
503
MediaEncoder::send(AVPacket& pkt, int streamIdx)
504
{
505 506 507 508
    if (!initialized_) {
        streamIdx = initStream(videoCodec_, nullptr);
        startIO();
    }
Philippe Gorley's avatar
Philippe Gorley committed
509 510
    if (streamIdx < 0)
        streamIdx = currentStreamIdx_;
511 512 513 514 515 516 517 518 519 520
    if (streamIdx >= 0 and streamIdx < encoders_.size()) {
        auto encoderCtx = encoders_[streamIdx];
        pkt.stream_index = streamIdx;
        if (pkt.pts != AV_NOPTS_VALUE)
            pkt.pts = av_rescale_q(pkt.pts, encoderCtx->time_base,
                                outputCtx_->streams[streamIdx]->time_base);
        if (pkt.dts != AV_NOPTS_VALUE)
            pkt.dts = av_rescale_q(pkt.dts, encoderCtx->time_base,
                                outputCtx_->streams[streamIdx]->time_base);
    }
521 522 523 524 525 526 527 528
    // write the compressed frame
    auto ret = av_write_frame(outputCtx_, &pkt);
    if (ret < 0) {
        RING_ERR() << "av_write_frame failed: " << libav_utils::getError(ret);
    }
    return ret >= 0;
}

529 530 531
int
MediaEncoder::flush()
{
532 533 534 535 536 537 538 539
    int ret = 0;
    for (size_t i = 0; i < outputCtx_->nb_streams; ++i) {
        if (encode(nullptr, i) < 0) {
            RING_ERR() << "Could not flush stream #" << i;
            ret |= 1u << i; // provide a way for caller to know which streams failed
        }
    }
    return -ret;
540 541
}

542 543
std::string
MediaEncoder::print_sdp()
544 545
{
    /* theora sdp can be huge */
546
#ifndef _WIN32
547
    const auto sdp_size = outputCtx_->streams[currentStreamIdx_]->codecpar->extradata_size + 2048;
548
#else
549
    const auto sdp_size = outputCtx_->streams[currentStreamIdx_]->codec->extradata_size + 2048;
550
#endif
551
    std::string result;
Guillaume Roguez's avatar
Guillaume Roguez committed
552
    std::string sdp(sdp_size, '\0');
553 554
    av_sdp_create(&outputCtx_, 1, &(*sdp.begin()), sdp_size);
    std::istringstream iss(sdp);
555
    std::string line;
556 557 558
    while (std::getline(iss, line)) {
        /* strip windows line ending */
        line = line.substr(0, line.length() - 1);
559
        result += line + "\n";
560
    }
561 562 563 564
#ifdef DEBUG_SDP
    RING_DBG("Sending SDP:\n%s", result.c_str());
#endif
    return result;
565 566
}

567 568
AVCodecContext*
MediaEncoder::prepareEncoderContext(AVCodec* outputCodec, bool is_video)
569
{
570
    AVCodecContext* encoderCtx = avcodec_alloc_context3(outputCodec);
571

572
    auto encoderName = outputCodec->name; // guaranteed to be non null if AVCodec is not null
573

574 575
    encoderCtx->thread_count = std::min(std::thread::hardware_concurrency(), is_video ? 16u : 4u);
    RING_DBG("[%s] Using %d threads", encoderName, encoderCtx->thread_count);
576

577 578
    if (is_video) {
        // resolution must be a multiple of two
579 580
        encoderCtx->width = videoOpts_.width;
        encoderCtx->height = videoOpts_.height;
581

582
        // satisfy ffmpeg: denominator must be 16bit or less value
583
        // time base = 1/FPS
584 585 586 587
        av_reduce(&encoderCtx->framerate.num, &encoderCtx->framerate.den,
                  videoOpts_.frameRate.numerator(), videoOpts_.frameRate.denominator(),
                  (1U << 16) - 1);
        encoderCtx->time_base = av_inv_q(encoderCtx->framerate);
588

589
        // emit one intra frame every gop_size frames
590
        encoderCtx->max_b_frames = 0;
591 592 593 594 595
        encoderCtx->pix_fmt = AV_PIX_FMT_YUV420P;
#ifdef RING_ACCEL
        if (accel_)
            encoderCtx->pix_fmt = accel_->getFormat();
#endif
596 597 598 599 600

        // Fri Jul 22 11:37:59 EDT 2011:tmatth:XXX: DON'T set this, we want our
        // pps and sps to be sent in-band for RTP
        // This is to place global headers in extradata instead of every
        // keyframe.
601
        // encoderCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
602
    } else {
603
        encoderCtx->sample_fmt = AV_SAMPLE_FMT_S16;
604 605 606 607 608 609
        encoderCtx->sample_rate = std::max(8000, audioOpts_.sampleRate);
        encoderCtx->time_base = AVRational{1, encoderCtx->sample_rate};
        if (audioOpts_.nbChannels > 2 || audioOpts_.nbChannels < 1) {
            encoderCtx->channels = std::max(std::min(audioOpts_.nbChannels, 1), 2);
            RING_ERR() << "[" << encoderName << "] Clamping invalid channel count: "
                << audioOpts_.nbChannels << " -> " << encoderCtx->channels;
610
        } else {
611
            encoderCtx->channels = audioOpts_.nbChannels;
612
        }
613 614 615 616
        encoderCtx->channel_layout = av_get_default_channel_layout(encoderCtx->channels);
        if (audioOpts_.frameSize) {
            encoderCtx->frame_size = audioOpts_.frameSize;
            RING_DBG() << "[" << encoderName << "] Frame size " << encoderCtx->frame_size;
617
        } else {
618
            RING_WARN() << "[" << encoderName << "] Frame size not set";
619 620
        }
    }
621 622

    return encoderCtx;
623 624
}

625 626
void
MediaEncoder::forcePresetX264(AVCodecContext* encoderCtx)
627 628
{
    const char *speedPreset = "ultrafast";
629
    if (av_opt_set(encoderCtx, "preset", speedPreset, AV_OPT_SEARCH_CHILDREN))
Adrien Béraud's avatar
Adrien Béraud committed
630
        RING_WARN("Failed to set x264 preset '%s'", speedPreset);
631
    const char *tune = "zerolatency";
632
    if (av_opt_set(encoderCtx, "tune", tune, AV_OPT_SEARCH_CHILDREN))
Adrien Béraud's avatar
Adrien Béraud committed
633
        RING_WARN("Failed to set x264 tune '%s'", tune);
634 635
}

636 637
void
MediaEncoder::extractProfileLevelID(const std::string &parameters,
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
                                         AVCodecContext *ctx)
{
    // From RFC3984:
    // If no profile-level-id is present, the Baseline Profile without
    // additional constraints at Level 1 MUST be implied.
    ctx->profile = FF_PROFILE_H264_BASELINE;
    ctx->level = 0x0d;
    // ctx->level = 0x0d; // => 13 aka 1.3
    if (parameters.empty())
        return;

    const std::string target("profile-level-id=");
    size_t needle = parameters.find(target);
    if (needle == std::string::npos)
        return;

    needle += target.length();
    const size_t id_length = 6; /* digits */
    const std::string profileLevelID(parameters.substr(needle, id_length));
    if (profileLevelID.length() != id_length)
        return;

    int result;
    std::stringstream ss;
    ss << profileLevelID;
    ss >> std::hex >> result;
    // profile-level id consists of three bytes
    const unsigned char profile_idc = result >> 16;             // 42xxxx -> 42
    const unsigned char profile_iop = ((result >> 8) & 0xff);   // xx80xx -> 80
    ctx->level = result & 0xff;                                 // xxxx0d -> 0d
    switch (profile_idc) {
669 670 671 672 673 674 675 676 677 678 679 680
        case FF_PROFILE_H264_BASELINE:
            // check constraint_set_1_flag
            if ((profile_iop & 0x40) >> 6)
                ctx->profile |= FF_PROFILE_H264_CONSTRAINED;
            break;
        case FF_PROFILE_H264_HIGH_10:
        case FF_PROFILE_H264_HIGH_422:
        case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
            // check constraint_set_3_flag
            if ((profile_iop & 0x10) >> 4)
                ctx->profile |= FF_PROFILE_H264_INTRA;
            break;
681
    }
Adrien Béraud's avatar
Adrien Béraud committed
682
    RING_DBG("Using profile %x and level %d", ctx->profile, ctx->level);
683
}
684

685 686 687
bool
MediaEncoder::useCodec(const ring::AccountCodecInfo* codec) const noexcept
{
688 689 690 691
    if (codec->systemCodecInfo.mediaType == MEDIA_VIDEO)
        return videoCodec_ == codec->systemCodecInfo.name;
    else
        return audioCodec_ == codec->systemCodecInfo.name;
692 693
}

694 695 696 697 698 699 700 701 702 703 704 705 706 707
#ifdef RING_ACCEL
void
MediaEncoder::enableAccel(bool enableAccel)
{
    enableAccel_ = enableAccel;
    emitSignal<DRing::ConfigurationSignal::HardwareEncodingChanged>(enableAccel_);
    if (!enableAccel_) {
        accel_.reset();
        for (auto enc : encoders_)
            enc->opaque = nullptr;
    }
}
#endif

708 709 710 711 712 713 714 715 716
unsigned
MediaEncoder::getStreamCount() const
{
    if (outputCtx_)
        return outputCtx_->nb_streams;
    else
        return 0;
}

717 718 719 720 721 722 723 724 725 726 727
MediaStream
MediaEncoder::getStream(const std::string& name, int streamIdx) const
{
    // if streamIdx is negative, use currentStreamIdx_
    if (streamIdx < 0)
        streamIdx = currentStreamIdx_;
    // make sure streamIdx is valid
    if (getStreamCount() <= 0 || streamIdx < 0 || encoders_.size() < (unsigned)(streamIdx + 1))
        return {};
    auto enc = encoders_[streamIdx];
    // TODO set firstTimestamp
728 729 730 731 732 733
    auto ms = MediaStream(name, enc);
#ifdef RING_ACCEL
    if (accel_)
        ms.format = accel_->getSoftwareFormat();
#endif
    return ms;
734 735
}

736
void
737
MediaEncoder::readConfig(AVDictionary** dict, AVCodecContext* encoderCtx)
738 739
{
    std::string path = fileutils::get_config_dir() + DIR_SEPARATOR_STR + "encoder.json";
740
    std::string name = encoderCtx->codec->name;
741 742 743 744 745 746 747 748 749
    if (fileutils::isFile(path)) {
        try {
            Json::Value root;
            std::ifstream file(path);
            file >> root;
            if (!root.isObject()) {
                RING_ERR() << "Invalid encoder configuration: root is not an object";
                return;
            }
750
            const auto& config = root[name];
751
            if (config.isNull()) {
752
                RING_WARN() << "Encoder '" << name << "' not found in configuration file";
753 754 755
                return;
            }
            if (!config.isObject()) {
756
                RING_ERR() << "Invalid encoder configuration: '" << name << "' is not an object";
757 758 759 760 761 762 763
                return;
            }
            // If users want to change these, they should use the settings page.
            for (Json::Value::const_iterator it = config.begin(); it != config.end(); ++it) {
                Json::Value v = *it;
                if (!it.key().isConvertibleTo(Json::ValueType::stringValue)
                    || !v.isConvertibleTo(Json::ValueType::stringValue)) {
764
                    RING_ERR() << "Invalid configuration for '" << name << "'";
765 766 767 768
                    return;
                }
                const auto& key = it.key().asString();
                const auto& value = v.asString();
769
                // provides a way to override all AVCodecContext fields MediaEncoder sets
770
                if (key == "parameters") // Used by MediaEncoder for profile-level-id, ignore
771
                    continue;
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
                else if (value.empty())
                    libav_utils::setDictValue(dict, key, nullptr);
                else if (key == "profile")
                    encoderCtx->profile = v.asInt();
                else if (key == "level")
                    encoderCtx->level = v.asInt();
                else if (key == "bit_rate")
                    encoderCtx->bit_rate = v.asInt();
                else if (key == "rc_buffer_size")
                    encoderCtx->rc_buffer_size = v.asInt();
                else if (key == "rc_min_rate")
                    encoderCtx->rc_min_rate = v.asInt();
                else if (key == "rc_max_rate")
                    encoderCtx->rc_max_rate = v.asInt();
                else if (key == "qmin")
                    encoderCtx->qmin = v.asInt();
                else if (key == "qmax")
                    encoderCtx->qmax = v.asInt();
790 791 792 793 794 795 796 797 798
                else
                    libav_utils::setDictValue(dict, key, value);
            }
        } catch (const Json::Exception& e) {
            RING_ERR() << "Failed to load encoder configuration file: " << e.what();
        }
    }
}

799
} // namespace ring