diff --git a/contrib/src/ffmpeg/rules.mak b/contrib/src/ffmpeg/rules.mak index 7fbab1491e23e1490d94bf6548fb0f2540721b23..3b6ec3306ccb17ed853be844d10e3abc60fa8dd8 100644 --- a/contrib/src/ffmpeg/rules.mak +++ b/contrib/src/ffmpeg/rules.mak @@ -54,6 +54,7 @@ FFMPEGCONF += \ --enable-muxer=ogg \ --enable-muxer=pcm_s16be \ --enable-muxer=pcm_s16le \ + --enable-muxer=wav \ --enable-demuxer=rtp \ --enable-demuxer=mjpeg \ --enable-demuxer=mjpeg_2000 \ @@ -164,7 +165,13 @@ FFMPEGCONF += \ --enable-decoder=pcm_s8 \ --enable-decoder=pcm_s8_planar \ --enable-decoder=pcm_u16be \ - --enable-decoder=pcm_u16le + --enable-decoder=pcm_u16le \ + --enable-encoder=pcm_u8 \ + --enable-encoder=pcm_f32le \ + --enable-encoder=pcm_f64le \ + --enable-encoder=pcm_s16le \ + --enable-encoder=pcm_s32le \ + --enable-encoder=pcm_s64le #encoders/decoders for images FFMPEGCONF += \ diff --git a/src/debug_utils.h b/src/debug_utils.h index 857279059960c6ac724f93ba7e0739b13c863611..3e519b7e7efbe54b53c22b9b765320d9a0e7a319 100644 --- a/src/debug_utils.h +++ b/src/debug_utils.h @@ -75,127 +75,122 @@ private: }; /** - * Minimally invasive audio logger. Writes a wav file from raw PCM or AVFrame. Helps debug what goes - * wrong with audio. + * Audio logger. Writes a wav file from raw PCM or AVFrame. Helps debug what goes wrong with audio. */ -class WavWriter -{ +class WavWriter { public: - WavWriter(std::string filename, AVFrame* frame) - : format_(static_cast<AVSampleFormat>(frame->format)) - , channels_(frame->channels) - , planar_(av_sample_fmt_is_planar(format_)) - , depth_(av_get_bytes_per_sample(format_)) - , stepPerSample_(planar_ ? depth_ : depth_ * channels_) + WavWriter(const char* filename, AVFrame* frame) { - std::vector<AVSampleFormat> v {AV_SAMPLE_FMT_FLT, - AV_SAMPLE_FMT_FLTP, - AV_SAMPLE_FMT_DBL, - AV_SAMPLE_FMT_DBLP}; - f_ = std::ofstream(filename, std::ios_base::out | std::ios_base::binary); - f_.imbue(std::locale::classic()); - f_ << "RIFF----WAVEfmt "; - if (std::find(v.begin(), v.end(), format_) == v.end()) { - write(16, 4); // Chunk size - write(1, 2); // WAVE_FORMAT_PCM - write(frame->channels, 2); - write(frame->sample_rate, 4); - write(frame->sample_rate * depth_ * frame->channels, 4); // Bytes per second - write(depth_ * frame->channels, 2); // Multi-channel sample size - write(8 * depth_, 2); // Bits per sample - f_ << "data"; - dataChunk_ = f_.tellp(); - f_ << "----"; - } else { - write(18, 4); // Chunk size - write(3, 2); // Non PCM data - write(frame->channels, 2); - write(frame->sample_rate, 4); - write(frame->sample_rate * depth_ * frame->channels, 4); // Bytes per second - write(depth_ * frame->channels, 2); // Multi-channel sample size - write(8 * depth_, 2); // Bits per sample - write(0, 2); // Extension size - f_ << "fact"; - write(4, 4); // Chunk size - factChunk_ = f_.tellp(); - f_ << "----"; - f_ << "data"; - dataChunk_ = f_.tellp(); - f_ << "----"; - } - } + JAMI_WARNING("WavWriter(): {} ({}, {})", filename, av_get_sample_fmt_name((AVSampleFormat)frame->format), frame->sample_rate); + avformat_alloc_output_context2(&format_ctx_, nullptr, "wav", filename); + if (!format_ctx_) + throw std::runtime_error("Failed to allocate output format context"); - ~WavWriter() - { - length_ = f_.tellp(); - f_.seekp(dataChunk_); - write(length_ - dataChunk_ + 4, 4); // bytes_per_sample * channels * nb_samples - f_.seekp(4); - write(length_ - 8, 4); - if (factChunk_) { - f_.seekp(factChunk_); - write((length_ - dataChunk_ + 4) / depth_, 4); // channels * nb_samples + AVCodecID codec_id = AV_CODEC_ID_NONE; + switch (frame->format) { + case AV_SAMPLE_FMT_U8: + codec_id = AV_CODEC_ID_PCM_U8; + break; + case AV_SAMPLE_FMT_S16: + case AV_SAMPLE_FMT_S16P: + codec_id = AV_CODEC_ID_PCM_S16LE; + break; + case AV_SAMPLE_FMT_S32: + case AV_SAMPLE_FMT_S32P: + codec_id = AV_CODEC_ID_PCM_S32LE; + break; + case AV_SAMPLE_FMT_S64: + case AV_SAMPLE_FMT_S64P: + codec_id = AV_CODEC_ID_PCM_S64LE; + break; + case AV_SAMPLE_FMT_FLT: + case AV_SAMPLE_FMT_FLTP: + codec_id = AV_CODEC_ID_PCM_F32LE; + break; + case AV_SAMPLE_FMT_DBL: + codec_id = AV_CODEC_ID_PCM_F64LE; + break; + default: + throw std::runtime_error("Unsupported audio format"); } - f_.flush(); - } - template<typename Word> - void write(Word value, unsigned size = sizeof(Word)) - { - auto p = reinterpret_cast<unsigned char const*>(&value); - for (int i = 0; size; --size, ++i) - f_.put(p[i]); + auto codec = avcodec_find_encoder(codec_id); + if (!codec) + throw std::runtime_error("Failed to find audio codec"); + + codec_ctx_ = avcodec_alloc_context3(codec); + if (!codec_ctx_) + throw std::runtime_error("Failed to allocate audio codec context"); + + codec_ctx_->sample_fmt = (AVSampleFormat)frame->format; + codec_ctx_->channel_layout = frame->channel_layout; + codec_ctx_->sample_rate = frame->sample_rate; + codec_ctx_->channels = frame->channels; + if (format_ctx_->oformat->flags & AVFMT_GLOBALHEADER) + codec_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + + if (avcodec_open2(codec_ctx_, codec, nullptr) < 0) + throw std::runtime_error("Failed to open audio codec"); + + stream_ = avformat_new_stream(format_ctx_, codec); + if (!stream_) + throw std::runtime_error("Failed to create audio stream"); + + if (avcodec_parameters_from_context(stream_->codecpar, codec_ctx_) < 0) + throw std::runtime_error("Failed to copy codec parameters to stream"); + + if (!(format_ctx_->oformat->flags & AVFMT_NOFILE)) { + if (avio_open(&format_ctx_->pb, filename, AVIO_FLAG_WRITE) < 0) { + throw std::runtime_error("Failed to open output file for writing"); + } + } + if (avformat_write_header(format_ctx_, nullptr) < 0) + throw std::runtime_error("Failed to write header to output file"); } - void write(AVFrame* frame) - { - for (int c = 0; c < frame->channels; ++c) { - int offset = planar_ ? 0 : depth_ * c; - for (int i = 0; i < frame->nb_samples; ++i) { - uint8_t* p = &frame->extended_data[planar_ ? c : 0][i + offset]; - switch (format_) { - case AV_SAMPLE_FMT_U8: - case AV_SAMPLE_FMT_U8P: - write<uint8_t>(*(uint8_t*) p); - break; - case AV_SAMPLE_FMT_S16: - case AV_SAMPLE_FMT_S16P: - write<int16_t>(*(int16_t*) p); - break; - case AV_SAMPLE_FMT_S32: - case AV_SAMPLE_FMT_S32P: - write<int32_t>(*(int32_t*) p); - break; - case AV_SAMPLE_FMT_S64: - case AV_SAMPLE_FMT_S64P: - write<int64_t>(*(int64_t*) p); - break; - case AV_SAMPLE_FMT_FLT: - case AV_SAMPLE_FMT_FLTP: - write<float>(*(float*) p); - break; - case AV_SAMPLE_FMT_DBL: - case AV_SAMPLE_FMT_DBLP: - write<double>(*(double*) p); - break; - default: - break; - } + void write(AVFrame* frame) { + int ret = avcodec_send_frame(codec_ctx_, frame); + if (ret < 0) + JAMI_ERROR("Error sending a frame to the encoder"); + while (ret >= 0) { + AVPacket *pkt = av_packet_alloc(); + ret = avcodec_receive_packet(codec_ctx_, pkt); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) + break; + else if (ret < 0) { + JAMI_ERROR("Error encoding a frame"); + break; } + pkt->stream_index = stream_->index; + pkt->pts = lastPts; + pkt->dts = lastPts; + lastPts += frame->nb_samples * (int64_t)stream_->time_base.den / (stream_->time_base.num * (int64_t)frame->sample_rate); + ret = av_write_frame(format_ctx_, pkt); + if (ret < 0) { + JAMI_ERROR("Error while writing output packet"); + break; + } + av_packet_free(&pkt); } - f_.flush(); } + ~WavWriter() { + if (codec_ctx_) { + avcodec_close(codec_ctx_); + avcodec_free_context(&codec_ctx_); + } + if (format_ctx_) { + av_write_trailer(format_ctx_); + if (!(format_ctx_->oformat->flags & AVFMT_NOFILE)) + avio_closep(&format_ctx_->pb); + avformat_free_context(format_ctx_); + } + } private: - std::ofstream f_; - size_t dataChunk_ {0}; - size_t factChunk_ {0}; - size_t length_ {0}; - AVSampleFormat format_ {AV_SAMPLE_FMT_NONE}; - size_t channels_ {0}; - bool planar_ {false}; - int depth_ {0}; - int stepPerSample_ {0}; + AVFormatContext* format_ctx_ {nullptr}; + AVCodecContext* codec_ctx_ {nullptr}; + AVStream* stream_ {nullptr}; + int64_t lastPts {0}; }; /**