diff --git a/src/media/media_encoder.cpp b/src/media/media_encoder.cpp
index 37a9377c06f5a05f1630d10989db129c0ee592df..bedecd9099b1dee391a4c9e922f3500992bc3788 100644
--- a/src/media/media_encoder.cpp
+++ b/src/media/media_encoder.cpp
@@ -469,9 +469,13 @@ MediaEncoder::encode(AVFrame* frame, int streamIdx)
     if (!initialized_ && frame) {
         // Initialize on first video frame, or first audio frame if no video stream
         bool isVideo = (frame->width > 0 && frame->height > 0);
-        if (isVideo or not videoOpts_.isValid()) {
+        if (isVideo and videoOpts_.isValid()) {
+            // Has video stream, so init with video frame
             streamIdx = initStream(videoCodec_, frame->hw_frames_ctx);
             startIO();
+        } else if (!isVideo and !videoOpts_.isValid()) {
+            // Only audio, for MediaRecorder, which doesn't use encodeAudio
+            startIO();
         } else {
             return 0;
         }