diff --git a/src/call.cpp b/src/call.cpp
index 4fba51b2f1c917bb548c0de962338dfc93d45ba8..ffc3a2757e311bc10d3349836ba7b72485fc0b75 100644
--- a/src/call.cpp
+++ b/src/call.cpp
@@ -680,6 +680,7 @@ Call::setConferenceInfo(const std::string& msg)
         if (not isConferenceParticipant()) {
             // confID_ empty -> participant set confInfo with the received one
             confInfo_ = std::move(newInfo);
+
             // Create sink for each participant
 #ifdef ENABLE_VIDEO
             createSinks(confInfo_);
diff --git a/src/conference.cpp b/src/conference.cpp
index b6d68128714942c14c3de33b988d26676b819f84..4421fef686269f4299b50a49e6249e54dcfc55be 100644
--- a/src/conference.cpp
+++ b/src/conference.cpp
@@ -128,6 +128,7 @@ Conference::Conference(const std::shared_ptr<Account>& account)
                     auto isModerator = shared->isModerator(peerId);
                     auto isHandRaised = shared->isHandRaised(deviceId);
                     auto isModeratorMuted = shared->isMuted(callId);
+                    auto isVoiceActive = shared->isVoiceActive(info.streamId);
                     if (auto videoMixer = shared->videoMixer_)
                         active = videoMixer->verifyActive(info.streamId);
                     newInfo.emplace_back(ParticipantInfo {std::move(uri),
@@ -142,7 +143,8 @@ Conference::Conference(const std::shared_ptr<Account>& account)
                                                           isLocalMuted,
                                                           isModeratorMuted,
                                                           isModerator,
-                                                          isHandRaised});
+                                                          isHandRaised,
+                                                          isVoiceActive});
                 } else {
                     auto isModeratorMuted = false;
                     // If not local
@@ -178,6 +180,7 @@ Conference::Conference(const std::shared_ptr<Account>& account)
                         isLocalMuted = shared->isMediaSourceMuted(MediaType::MEDIA_AUDIO);
                     }
                     auto isHandRaised = shared->isHandRaised(deviceId);
+                    auto isVoiceActive = shared->isVoiceActive(streamId);
                     newInfo.emplace_back(ParticipantInfo {std::move(uri),
                                                           deviceId,
                                                           std::move(streamId),
@@ -190,7 +193,8 @@ Conference::Conference(const std::shared_ptr<Account>& account)
                                                           isLocalMuted,
                                                           isModeratorMuted,
                                                           isModerator,
-                                                          isHandRaised});
+                                                          isHandRaised,
+                                                          isVoiceActive});
                 }
             }
             if (auto videoMixer = shared->videoMixer_) {
@@ -248,6 +252,9 @@ Conference::Conference(const std::shared_ptr<Account>& account)
             if (auto* transport = call->getTransport())
                 setHandRaised(std::string(transport->deviceId()), state);
     });
+
+    parser_.onVoiceActivity(
+        [&](const auto& streamId, bool state) { setVoiceActivity(streamId, state); });
 }
 
 Conference::~Conference()
@@ -1226,6 +1233,51 @@ Conference::setHandRaised(const std::string& deviceId, const bool& state)
     }
 }
 
+bool
+Conference::isVoiceActive(std::string_view streamId) const
+{
+    return streamsVoiceActive.find(streamId) != streamsVoiceActive.end();
+}
+
+void
+Conference::setVoiceActivity(const std::string& streamId, const bool& newState)
+{
+    // verify that streamID exists in our confInfo
+    bool exists = false;
+    for (auto& participant : confInfo_) {
+        if (participant.sinkId == streamId) {
+            exists = true;
+            break;
+        }
+    }
+
+    if (!exists) {
+        JAMI_ERR("participant not found with streamId: %s", streamId.c_str());
+        return;
+    }
+
+    auto previousState = isVoiceActive(streamId);
+
+    if (previousState == newState) {
+        // no change, do not send out updates
+        return;
+    }
+
+    if (newState and not previousState) {
+        // voice going from inactive to active
+        streamsVoiceActive.emplace(streamId);
+        updateVoiceActivity();
+        return;
+    }
+
+    if (not newState and previousState) {
+        // voice going from active to inactive
+        streamsVoiceActive.erase(streamId);
+        updateVoiceActivity();
+        return;
+    }
+}
+
 void
 Conference::setModerator(const std::string& participant_id, const bool& state)
 {
@@ -1268,6 +1320,32 @@ Conference::updateHandsRaised()
     sendConferenceInfos();
 }
 
+void
+Conference::updateVoiceActivity()
+{
+    std::lock_guard<std::mutex> lk(confInfoMutex_);
+
+    // streamId is actually sinkId
+    for (ParticipantInfo& participantInfo : confInfo_) {
+        bool newActivity;
+
+        if (auto call = getCallWith(std::string(string_remove_suffix(participantInfo.uri, '@')),
+                                    participantInfo.device)) {
+            // if this participant is in a direct call with us
+            // grab voice activity info directly from the call
+            newActivity = call->hasPeerVoice();
+        } else {
+            // check for it
+            newActivity = isVoiceActive(participantInfo.sinkId);
+        }
+
+        if (participantInfo.voiceActivity != newActivity) {
+            participantInfo.voiceActivity = newActivity;
+        }
+    }
+    sendConferenceInfos(); // also emits signal to client
+}
+
 void
 Conference::foreachCall(const std::function<void(const std::shared_ptr<Call>& call)>& cb)
 {
diff --git a/src/conference.h b/src/conference.h
index bdfef84843b1ae39b5f69fdb518383879f999713..87017762231db3cddd845abeed9cca829cbc518e 100644
--- a/src/conference.h
+++ b/src/conference.h
@@ -59,11 +59,12 @@ class VideoMixer;
 }
 #endif
 
+// info for a stream
 struct ParticipantInfo
 {
     std::string uri;
     std::string device;
-    std::string sinkId;
+    std::string sinkId; // stream ID
     bool active {false};
     int x {0};
     int y {0};
@@ -74,6 +75,7 @@ struct ParticipantInfo
     bool audioModeratorMuted {false};
     bool isModerator {false};
     bool handRaised {false};
+    bool voiceActivity {false};
 
     void fromJson(const Json::Value& v)
     {
@@ -90,6 +92,7 @@ struct ParticipantInfo
         audioModeratorMuted = v["audioModeratorMuted"].asBool();
         isModerator = v["isModerator"].asBool();
         handRaised = v["handRaised"].asBool();
+        voiceActivity = v["voiceActivity"].asBool();
     }
 
     Json::Value toJson() const
@@ -108,6 +111,7 @@ struct ParticipantInfo
         val["audioModeratorMuted"] = audioModeratorMuted;
         val["isModerator"] = isModerator;
         val["handRaised"] = handRaised;
+        val["voiceActivity"] = voiceActivity;
         return val;
     }
 
@@ -125,7 +129,8 @@ struct ParticipantInfo
                 {"audioLocalMuted", audioLocalMuted ? "true" : "false"},
                 {"audioModeratorMuted", audioModeratorMuted ? "true" : "false"},
                 {"isModerator", isModerator ? "true" : "false"},
-                {"handRaised", handRaised ? "true" : "false"}};
+                {"handRaised", handRaised ? "true" : "false"},
+                {"voiceActivity", voiceActivity ? "true" : "false"}};
     }
 
     friend bool operator==(const ParticipantInfo& p1, const ParticipantInfo& p2)
@@ -135,7 +140,8 @@ struct ParticipantInfo
                and p1.h == p2.h and p1.videoMuted == p2.videoMuted
                and p1.audioLocalMuted == p2.audioLocalMuted
                and p1.audioModeratorMuted == p2.audioModeratorMuted
-               and p1.isModerator == p2.isModerator and p1.handRaised == p2.handRaised;
+               and p1.isModerator == p2.isModerator and p1.handRaised == p2.handRaised
+               and p1.voiceActivity == p2.voiceActivity;
     }
 
     friend bool operator!=(const ParticipantInfo& p1, const ParticipantInfo& p2)
@@ -347,6 +353,7 @@ public:
     void setModerator(const std::string& uri, const bool& state);
     void hangupParticipant(const std::string& accountUri, const std::string& deviceId = "");
     void setHandRaised(const std::string& uri, const bool& state);
+    void setVoiceActivity(const std::string& streamId, const bool& newState);
 
     void muteParticipant(const std::string& uri, const bool& state);
     void muteLocalHost(bool is_muted, const std::string& mediaType);
@@ -368,6 +375,8 @@ public:
                     const bool& state);
     void updateMuted();
 
+    void updateVoiceActivity();
+
     std::shared_ptr<Call> getCallFromPeerID(std::string_view peerId);
 
     /**
@@ -390,6 +399,7 @@ private:
     static std::shared_ptr<Call> getCall(const std::string& callId);
     bool isModerator(std::string_view uri) const;
     bool isHandRaised(std::string_view uri) const;
+    bool isVoiceActive(std::string_view uri) const;
     void updateModerators();
     void updateHandsRaised();
     void muteHost(bool state);
@@ -420,6 +430,9 @@ private:
     std::set<std::string, std::less<>> participantsMuted_ {};
     std::set<std::string, std::less<>> handsRaised_;
 
+    // stream IDs
+    std::set<std::string, std::less<>> streamsVoiceActive {};
+
     void initRecorder(std::shared_ptr<MediaRecorder>& rec);
     void deinitRecorder(std::shared_ptr<MediaRecorder>& rec);
 
diff --git a/src/conference_protocol.cpp b/src/conference_protocol.cpp
index 78d5d6de482bafa70fa25b2fadb4ae22e79c9427..01cc6a54f112f8311b95cc442739cea616aa02a7 100644
--- a/src/conference_protocol.cpp
+++ b/src/conference_protocol.cpp
@@ -44,6 +44,7 @@ constexpr static const char* ACTIVE = "active";
 constexpr static const char* MUTEAUDIO = "muteAudio";
 // Future
 constexpr static const char* MUTEVIDEO = "muteVideo";
+constexpr static const char* VOICEACTIVITY = "voiceActivity";
 
 } // namespace ProtocolKeys
 
@@ -52,7 +53,8 @@ ConfProtocolParser::parse()
 {
     if (data_.isMember(ProtocolKeys::PROTOVERSION)) {
         uint32_t version = data_[ProtocolKeys::PROTOVERSION].asUInt();
-        if (version_) version_(version);
+        if (version_)
+            version_(version);
         if (version == 1) {
             parseV1();
         } else {
@@ -98,7 +100,7 @@ ConfProtocolParser::parseV0()
     }
     if (data_.isMember(ProtocolKeys::MUTEPART) && data_.isMember(ProtocolKeys::MUTESTATE)) {
         muteParticipant_(data_[ProtocolKeys::MUTEPART].asString(),
-                        data_[ProtocolKeys::MUTESTATE].asString() == TRUE_STR);
+                         data_[ProtocolKeys::MUTESTATE].asString() == TRUE_STR);
     }
     if (data_.isMember(ProtocolKeys::HANGUPPART)) {
         kickParticipant_(data_[ProtocolKeys::HANGUPPART].asString());
@@ -108,8 +110,8 @@ ConfProtocolParser::parseV0()
 void
 ConfProtocolParser::parseV1()
 {
-    if (!checkAuthorization_ || !setLayout_ || !raiseHand_ || !hangupParticipant_ || !muteStreamAudio_
-        || !setActiveStream_) {
+    if (!checkAuthorization_ || !setLayout_ || !raiseHand_ || !hangupParticipant_
+        || !muteStreamAudio_ || !setActiveStream_) {
         JAMI_ERR() << "Missing methods for ConfProtocolParser";
         return;
     }
@@ -138,28 +140,34 @@ ConfProtocolParser::parseV1()
                         hangupParticipant_(accountUri, deviceId);
                     }
                     if (deviceValue.isMember(ProtocolKeys::MEDIAS)) {
-                        for (Json::Value::const_iterator itrm = accValue[ProtocolKeys::MEDIAS].begin();
+                        for (Json::Value::const_iterator itrm = accValue[ProtocolKeys::MEDIAS]
+                                                                    .begin();
                              itrm != accValue[ProtocolKeys::MEDIAS].end();
                              itrm++) {
                             auto streamId = itrm.key().asString();
                             auto mediaVal = *itrm;
+                            if (mediaVal.isMember(ProtocolKeys::VOICEACTIVITY)) {
+                                voiceActivity_(streamId,
+                                               mediaVal[ProtocolKeys::VOICEACTIVITY].asBool());
+                            }
                             if (isPeerModerator) {
-                                if (mediaVal.isMember(ProtocolKeys::MUTEVIDEO) && !muteStreamVideo_) {
+                                if (mediaVal.isMember(ProtocolKeys::MUTEVIDEO)
+                                    && !muteStreamVideo_) {
                                     // Note: For now, it's not implemented so not set
                                     muteStreamVideo_(accountUri,
-                                                deviceId,
-                                                streamId,
-                                                mediaVal[ProtocolKeys::MUTEVIDEO].asBool());
+                                                     deviceId,
+                                                     streamId,
+                                                     mediaVal[ProtocolKeys::MUTEVIDEO].asBool());
                                 }
                                 if (mediaVal.isMember(ProtocolKeys::MUTEAUDIO)) {
                                     muteStreamAudio_(accountUri,
-                                            deviceId,
-                                            streamId,
-                                            mediaVal[ProtocolKeys::MUTEAUDIO].asBool());
+                                                     deviceId,
+                                                     streamId,
+                                                     mediaVal[ProtocolKeys::MUTEAUDIO].asBool());
                                 }
                                 if (mediaVal.isMember(ProtocolKeys::ACTIVE)) {
                                     setActiveStream_(streamId,
-                                                    mediaVal[ProtocolKeys::ACTIVE].asBool());
+                                                     mediaVal[ProtocolKeys::ACTIVE].asBool());
                                 }
                             }
                         }
diff --git a/src/conference_protocol.h b/src/conference_protocol.h
index f43e13074ff16d3d947999183293dbed82b78dca..8d70e6a6f3994afa8895bf4f2eaa4e94e6dcdd67 100644
--- a/src/conference_protocol.h
+++ b/src/conference_protocol.h
@@ -35,19 +35,17 @@ namespace jami {
  * Used to parse confOrder objects
  * @note the user of this class must initialize the different lambdas.
  */
-class ConfProtocolParser {
-
+class ConfProtocolParser
+{
 public:
     ConfProtocolParser() {};
 
-    void onVersion(std::function<void(uint32_t)>&& cb)
-    {
-        version_ = std::move(cb);
-    }
+    void onVersion(std::function<void(uint32_t)>&& cb) { version_ = std::move(cb); }
     /**
      * Ask the caller to check if a peer is authorized (moderator of the conference)
      */
-    void onCheckAuthorization(std::function<bool(std::string_view)>&& cb) {
+    void onCheckAuthorization(std::function<bool(std::string_view)>&& cb)
+    {
         checkAuthorization_ = std::move(cb);
     }
 
@@ -73,10 +71,7 @@ public:
     {
         muteStreamVideo_ = std::move(cb);
     }
-    void onSetLayout(std::function<void(int)>&& cb)
-    {
-        setLayout_ = std::move(cb);
-    }
+    void onSetLayout(std::function<void(int)>&& cb) { setLayout_ = std::move(cb); }
 
     // Version 0, deprecated
     void onKickParticipant(std::function<void(const std::string&)>&& cb)
@@ -95,6 +90,10 @@ public:
     {
         raiseHandUri_ = std::move(cb);
     }
+    void onVoiceActivity(std::function<void(const std::string&, bool)>&& cb)
+    {
+        voiceActivity_ = std::move(cb);
+    }
 
     /**
      * Inject in the parser the data to parse
@@ -133,6 +132,7 @@ private:
     std::function<void(const std::string&)> kickParticipant_;
     std::function<void(const std::string&)> setActiveParticipant_;
     std::function<void(const std::string&, bool)> muteParticipant_;
+    std::function<void(const std::string&, bool)> voiceActivity_;
 };
 
 } // namespace jami
diff --git a/src/media/audio/audio_rtp_session.cpp b/src/media/audio/audio_rtp_session.cpp
index 928117d117bf3143b4bc3a33fde03f204e46df04..478bf4bdd06da32188e93fa720861a63eec7af58 100644
--- a/src/media/audio/audio_rtp_session.cpp
+++ b/src/media/audio/audio_rtp_session.cpp
@@ -68,6 +68,7 @@ AudioRtpSession::~AudioRtpSession()
 void
 AudioRtpSession::startSender()
 {
+    std::lock_guard<std::recursive_mutex> lock(mutex_);
     JAMI_DBG("Start audio RTP sender: input [%s] - muted [%s]",
              input_.c_str(),
              muteState_ ? "YES" : "NO");
@@ -116,13 +117,16 @@ AudioRtpSession::startSender()
     try {
         sender_.reset();
         socketPair_->stopSendOp(false);
-        sender_.reset(
-            new AudioSender(getRemoteRtpUri(), send_, *socketPair_, initSeqVal_, mtu_));
+        sender_.reset(new AudioSender(getRemoteRtpUri(), send_, *socketPair_, initSeqVal_, mtu_));
     } catch (const MediaEncoderException& e) {
         JAMI_ERR("%s", e.what());
         send_.enabled = false;
     }
 
+    if (voiceCallback_) {
+        sender_->setVoiceCallback(voiceCallback_);
+    }
+
     // NOTE do after sender/encoder are ready
     auto codec = std::static_pointer_cast<AccountAudioCodecInfo>(send_.codec);
     audioInput_->setFormat(codec->audioformat);
@@ -138,8 +142,9 @@ AudioRtpSession::restartSender()
 {
     std::lock_guard<std::recursive_mutex> lock(mutex_);
     // ensure that start has been called before restart
-    if (not socketPair_)
+    if (not socketPair_) {
         return;
+    }
 
     startSender();
 }
@@ -248,6 +253,16 @@ AudioRtpSession::setMuted(bool muted, Direction)
         audioInput_->setMuted(muted);
 }
 
+void
+AudioRtpSession::setVoiceCallback(std::function<void(bool)> cb)
+{
+    std::lock_guard<std::recursive_mutex> lock(mutex_);
+    voiceCallback_ = std::move(cb);
+    if (sender_) {
+        sender_->setVoiceCallback(voiceCallback_);
+    }
+}
+
 bool
 AudioRtpSession::check_RCTP_Info_RR(RTCPInfo& rtcpi)
 {
diff --git a/src/media/audio/audio_rtp_session.h b/src/media/audio/audio_rtp_session.h
index 9385b26021bbde139a802bc7790c9bc92a38ce8c..28729ac7496cfb941c78654d776c2735edfaa391 100644
--- a/src/media/audio/audio_rtp_session.h
+++ b/src/media/audio/audio_rtp_session.h
@@ -64,6 +64,8 @@ public:
     std::shared_ptr<AudioInput>& getAudioLocal() { return audioInput_; }
     std::unique_ptr<AudioReceiveThread>& getAudioReceive() { return receiveThread_; }
 
+    void setVoiceCallback(std::function<void(bool)> cb);
+
 private:
     void startSender();
     void startReceiver();
@@ -87,6 +89,8 @@ private:
 
     // Interval in seconds between RTCP checking
     std::chrono::seconds rtcp_checking_interval {4};
+
+    std::function<void(bool)> voiceCallback_;
 };
 
 } // namespace jami
diff --git a/src/media/audio/audio_sender.cpp b/src/media/audio/audio_sender.cpp
index 2f1122b4328941b4a25fae53b2fe2eb46e54fb9a..be7728a0a28f8dd215d3383696f187c9d8a3b940 100644
--- a/src/media/audio/audio_sender.cpp
+++ b/src/media/audio/audio_sender.cpp
@@ -92,10 +92,33 @@ AudioSender::update(Observable<std::shared_ptr<jami::MediaFrame>>* /*obs*/,
     auto frame = framePtr->pointer();
     frame->pts = sent_samples;
     sent_samples += frame->nb_samples;
+
+    // check for change in voice activity, if so, call callback
+    // downcast MediaFrame to AudioFrame
+    bool hasVoice = std::dynamic_pointer_cast<AudioFrame>(framePtr)->has_voice;
+    if (hasVoice != voice_) {
+        voice_ = hasVoice;
+        if (voiceCallback_) {
+            voiceCallback_(voice_);
+        } else {
+            JAMI_ERR("AudioSender no voice callback!");
+        }
+    }
+
     if (audioEncoder_->encodeAudio(*std::static_pointer_cast<AudioFrame>(framePtr)) < 0)
         JAMI_ERR("encoding failed");
 }
 
+void
+AudioSender::setVoiceCallback(std::function<void(bool)> cb)
+{
+    if (cb) {
+        voiceCallback_ = std::move(cb);
+    } else {
+        JAMI_ERR("AudioSender trying to set invalid voice callback");
+    }
+}
+
 uint16_t
 AudioSender::getLastSeqValue()
 {
diff --git a/src/media/audio/audio_sender.h b/src/media/audio/audio_sender.h
index 6dc58e3f6f72c27353e784b0d08185a33c782352..9835fcab248a7460d847266af63704b53cca04a7 100644
--- a/src/media/audio/audio_sender.h
+++ b/src/media/audio/audio_sender.h
@@ -47,6 +47,8 @@ public:
     uint16_t getLastSeqValue();
     int setPacketLoss(uint64_t pl);
 
+    void setVoiceCallback(std::function<void(bool)> cb);
+
     void update(Observable<std::shared_ptr<jami::MediaFrame>>*,
                 const std::shared_ptr<jami::MediaFrame>&) override;
 
@@ -67,6 +69,10 @@ private:
     AudioBuffer resampledData_;
     const uint16_t seqVal_;
     uint16_t mtu_;
+
+    // last voice activity state
+    bool voice_ {false};
+    std::function<void(bool)> voiceCallback_;
 };
 
 } // namespace jami
diff --git a/src/media/peerrecorder.h b/src/media/peerrecorder.h
index b7568be6b0159dffe84bf7090627840657fa0d59..70d84ca540bd8a527dba95d41a5015737deda072 100644
--- a/src/media/peerrecorder.h
+++ b/src/media/peerrecorder.h
@@ -41,9 +41,14 @@ public:
 
     virtual bool isPeerMuted() const { return peerMuted_; }
 
+    virtual void peerVoice(bool voice) = 0;
+
+    virtual bool hasPeerVoice() const { return peerVoice_; }
+
 protected:
     bool peerRecording_ {false};
     bool peerMuted_ {false};
+    bool peerVoice_ {false};
 };
 
 } // namespace jami
diff --git a/src/sip/sipcall.cpp b/src/sip/sipcall.cpp
index eab843037dc9539da11614698c305aa2058dc942..47a74dc8f3a60facf1157c67428f9e40bb27b25e 100644
--- a/src/sip/sipcall.cpp
+++ b/src/sip/sipcall.cpp
@@ -234,6 +234,10 @@ SIPCall::configureRtpSession(const std::shared_ptr<RtpSession>& rtpSession,
             thisPtr->rtpSetupSuccess(type, isRemote);
     });
 
+    if (localMedia.type == MediaType::MEDIA_AUDIO) {
+        setupVoiceCallback(rtpSession);
+    }
+
 #ifdef ENABLE_VIDEO
     if (localMedia.type == MediaType::MEDIA_VIDEO) {
         auto videoRtp = std::dynamic_pointer_cast<video::VideoRtpSession>(rtpSession);
@@ -255,6 +259,51 @@ SIPCall::configureRtpSession(const std::shared_ptr<RtpSession>& rtpSession,
 #endif
 }
 
+void
+SIPCall::setupVoiceCallback(const std::shared_ptr<RtpSession>& rtpSession)
+{
+    // need to downcast to access setVoiceCallback
+    auto audioRtp = std::dynamic_pointer_cast<AudioRtpSession>(rtpSession);
+
+    audioRtp->setVoiceCallback([w = weak()](bool voice) {
+        // this is called whenever voice is detected on the local audio
+
+        runOnMainThread([w, voice] {
+            if (auto thisPtr = w.lock()) {
+                // TODO: once we support multiple streams, change this to the right one
+                std::string streamId;
+
+                if (not jami::getVideoDeviceMonitor().getDeviceList().empty()) {
+                    // if we have a video device
+                    streamId = sip_utils::streamId("", sip_utils::DEFAULT_VIDEO_STREAMID);
+                } else {
+                    // no video
+                    streamId = "";
+                }
+
+                // send our local voice activity
+                if (auto conference = thisPtr->conf_.lock()) {
+                    // we are in a conference
+
+                    // updates conference info and sends it to others via ConfInfo
+                    // (only if there was a change)
+                    // also emits signal with updated conference info
+                    conference->setVoiceActivity(streamId, voice);
+                } else {
+                    // we are in a one-to-one call
+                    // send voice activity over SIP
+                    // TODO: change the streamID once multiple streams are supported
+                    thisPtr->sendVoiceActivity("-1", voice);
+
+                    // TODO: maybe emit signal here for local voice activity
+                }
+            } else {
+                JAMI_ERR("voice activity callback unable to lock weak ptr to SIPCall");
+            }
+        });
+    });
+}
+
 std::shared_ptr<SIPAccountBase>
 SIPCall::getSIPAccount() const
 {
@@ -641,6 +690,31 @@ SIPCall::sendMuteState(bool state)
     }
 }
 
+void
+SIPCall::sendVoiceActivity(std::string_view streamId, bool state)
+{
+    // dont send streamId if it's -1
+    std::string streamIdPart = "";
+    if (streamId != "-1" && !streamId.empty()) {
+        streamIdPart = fmt::format("<stream_id>{}</stream_id>", streamId);
+    }
+
+    std::string BODY = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
+                       "<media_control><vc_primitive>"
+                       + streamIdPart
+                       + "<to_encoder>"
+                         "<voice_activity="
+                       + std::to_string(state)
+                       + "/>"
+                         "</to_encoder></vc_primitive></media_control>";
+
+    try {
+        sendSIPInfo(BODY, "media_control+xml");
+    } catch (const std::exception& e) {
+        JAMI_ERR("Error sending voice activity state: %s", e.what());
+    }
+}
+
 void
 SIPCall::setInviteSession(pjsip_inv_session* inviteSession)
 {
@@ -2249,7 +2323,7 @@ SIPCall::updateMediaStream(const MediaAttribute& newMediaAttr, size_t streamIdx)
     auto const& mediaAttr = rtpStream.mediaAttribute_;
     assert(mediaAttr);
 
-    bool notify = false;
+    bool notifyMute = false;
 
     if (newMediaAttr.muted_ == mediaAttr->muted_) {
         // Nothing to do. Already in the desired state.
@@ -2261,7 +2335,7 @@ SIPCall::updateMediaStream(const MediaAttribute& newMediaAttr, size_t streamIdx)
     } else {
         // Update
         mediaAttr->muted_ = newMediaAttr.muted_;
-        notify = true;
+        notifyMute = true;
         JAMI_DBG("[call:%s] %s [%s]",
                  getCallId().c_str(),
                  mediaAttr->muted_ ? "muting" : "un-muting",
@@ -2272,7 +2346,7 @@ SIPCall::updateMediaStream(const MediaAttribute& newMediaAttr, size_t streamIdx)
     if (not newMediaAttr.sourceUri_.empty())
         mediaAttr->sourceUri_ = newMediaAttr.sourceUri_;
 
-    if (notify and mediaAttr->type_ == MediaType::MEDIA_AUDIO) {
+    if (notifyMute and mediaAttr->type_ == MediaType::MEDIA_AUDIO) {
         rtpStream.rtpSession_->setMediaSource(mediaAttr->sourceUri_);
         rtpStream.rtpSession_->setMuted(mediaAttr->muted_);
         sendMuteState(mediaAttr->muted_);
@@ -2282,7 +2356,7 @@ SIPCall::updateMediaStream(const MediaAttribute& newMediaAttr, size_t streamIdx)
     }
 
 #ifdef ENABLE_VIDEO
-    if (notify and mediaAttr->type_ == MediaType::MEDIA_VIDEO) {
+    if (notifyMute and mediaAttr->type_ == MediaType::MEDIA_VIDEO) {
         rtpStream.rtpSession_->setMediaSource(mediaAttr->sourceUri_);
         rtpStream.rtpSession_->setMuted(mediaAttr->muted_);
 
@@ -3427,6 +3501,19 @@ SIPCall::peerMuted(bool muted)
         conf->updateMuted();
 }
 
+void
+SIPCall::peerVoice(bool voice)
+{
+    peerVoice_ = voice;
+
+    if (auto conference = conf_.lock()) {
+        conference->updateVoiceActivity();
+    } else {
+        // one-to-one call
+        // maybe emit signal with partner voice activity
+    }
+}
+
 void
 SIPCall::resetMediaReady()
 {
diff --git a/src/sip/sipcall.h b/src/sip/sipcall.h
index ad164eef36c6bd08c714fc582ac2dccc1dc9e723..dc7b74d559c9dae40b5f37a746908e5ca42ce9a9 100644
--- a/src/sip/sipcall.h
+++ b/src/sip/sipcall.h
@@ -165,9 +165,13 @@ public:
 
     // Override of Recordable class
     bool toggleRecording() override;
+    // End of override of Recordable class
+
+    // Override PeerRecorder
     void peerRecording(bool state) override;
     void peerMuted(bool state) override;
-    // End of override of Recordable class
+    void peerVoice(bool state) override;
+    // end override PeerRecorder
 
     void monitor() const override;
 
@@ -324,7 +328,10 @@ private:
 
     void rtpSetupSuccess(MediaType type, bool isRemote);
 
+    void setupVoiceCallback(const std::shared_ptr<RtpSession>& rtpSession);
+
     void sendMuteState(bool state);
+    void sendVoiceActivity(std::string_view streamId, bool state);
 
     void resetTransport(std::shared_ptr<IceTransport>&& transport);
 
diff --git a/src/sip/sipvoiplink.cpp b/src/sip/sipvoiplink.cpp
index 95cf5ccb3455bc291b77682a0d0e49e489824761..954009c57aaf727c6005fc4ceefa35d389642d2b 100644
--- a/src/sip/sipvoiplink.cpp
+++ b/src/sip/sipvoiplink.cpp
@@ -1173,6 +1173,7 @@ handleMediaControl(SIPCall& call, pjsip_msg_body* body)
         static constexpr auto DEVICE_ORIENTATION = "device_orientation"sv;
         static constexpr auto RECORDING_STATE = "recording_state"sv;
         static constexpr auto MUTE_STATE = "mute_state"sv;
+        static constexpr auto VOICE_ACTIVITY = "voice_activity"sv;
 
         int streamIdx = -1;
         if (body_msg.find(STREAM_ID) != std::string_view::npos) {
@@ -1243,6 +1244,20 @@ handleMediaControl(SIPCall& call, pjsip_msg_body* body)
                 }
                 return true;
             }
+        } else if (body_msg.find(VOICE_ACTIVITY) != std::string_view::npos) {
+            static const std::regex REC_REGEX("voice_activity=([0-1])");
+            std::svmatch matched_pattern;
+            std::regex_search(body_msg, matched_pattern, REC_REGEX);
+
+            if (matched_pattern.ready() && !matched_pattern.empty() && matched_pattern[1].matched) {
+                try {
+                    bool state = std::stoi(matched_pattern[1]);
+                    call.peerVoice(state);
+                } catch (const std::exception& e) {
+                    JAMI_WARN("Error parsing state remote voice: %s", e.what());
+                }
+                return true;
+            }
         }
     }