AudioRtpSession.h 18.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  Copyright (C) 2009 Savoir-Faire Linux inc.
 *  Author: Pierre-Luc Bacon <pierre-luc.bacon@savoirfairelinux.com>
 *  Author: Alexandre Bourget <alexandre.bourget@savoirfairelinux.com>
 *  Author: Laurielle Lea <laurielle.lea@savoirfairelinux.com>
 *  Author: Emmanuel Milou <emmanuel.milou@savoirfairelinux.com>
 *  Author: Yan Morin <yan.morin@savoirfairelinux.com>
 *  Author: Alexandre Savard <alexandre.savard@savoirfairelinux.com>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
23 24
#ifndef __SFL_AUDIO_RTP_SESSION_H__
#define __SFL_AUDIO_RTP_SESSION_H__
25 26 27 28 29

#include <iostream>
#include <exception>

#include "global.h"
30

31
#include "sip/sipcall.h"
32
#include "sip/sdp.h"
33
#include "audio/audiolayer.h"
34 35 36
#include "audio/codecs/audiocodec.h"
#include "audio/samplerateconverter.h"
#include "managerimpl.h"
37

38
#include <ccrtp/rtp.h>
39
#include <cc++/numbers.h> // ost::Time
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60

namespace sfl {

    static const int schedulingTimeout = 100000;
    static const int expireTimeout = 1000000;
    
    class AudioRtpSessionException: public std::exception
    {
      virtual const char* what() const throw()
      {
        return "AudioRtpSessionException occured";
      }
    };
    
    template <typename D>
    class AudioRtpSession : public ost::Thread, public ost::TimerPort {
        public:
            /**
            * Constructor
            * @param sipcall The pointer on the SIP call
            */
61
            AudioRtpSession (ManagerImpl * manager, SIPCall* sipcall);
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

            ~AudioRtpSession();

            // Thread associated method
            virtual void run ();
            
            int startRtpThread();
    
        private:
        
            void initBuffers(void);
            
            void setSessionTimeouts(void);
            void setSessionMedia(void);
            void setDestinationIpAddress(void);
                
            int processDataEncode(void);
            void processDataDecode(unsigned char * spkrData, unsigned int size, int& countTime);
            
            inline float computeCodecFrameSize (int codecSamplePerFrame, int codecClockRate) {
                return ( (float) codecSamplePerFrame * 1000.0) / (float) codecClockRate;
            }          
84 85
            int computeNbByteAudioLayer (float codecFrameSize) {
                return (int) ( ((float) converterSamplingRate * codecFrameSize * sizeof(SFLDataFormat))/ 1000.0);
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
            }
          
            void sendMicData(int timestamp);
            void receiveSpeakerData (int& countTime);
            
            ost::Time * _time;
   
            // This semaphore is not used 
            // but is needed in order to avoid 
            // ambiguous compiling problem.
            // It is set to 0, and since it is
            // optional in ost::thread, then 
            // it amounts to the same as doing
            // start() with no semaphore at all. 
            ost::Semaphore * _mainloopSemaphore;
                     
            AudioCodec * _audiocodec;
            
            AudioLayer * _audiolayer;
                                                 
            /** Mic-data related buffers */
            SFLDataFormat* _micData;
            SFLDataFormat* _micDataConverted;
            unsigned char* _micDataEncoded;

            /** Speaker-data related buffers */
            SFLDataFormat* _spkrDataDecoded;
            SFLDataFormat* _spkrDataConverted;

            /** Sample rate converter object */
            SamplerateConverter * _converter;

            /** Variables to process audio stream: sample rate for playing sound (typically 44100HZ) */
            int _layerSampleRate;  

            /** Sample rate of the codec we use to encode and decode (most of time 8000HZ) */
            int _codecSampleRate;

            /** Length of the sound frame we capture in ms (typically 20ms) */
            int _layerFrameSize; 

            /** Codecs frame size in samples (20 ms => 882 at 44.1kHz)
                The exact value is stored in the codec */
            int _codecFrameSize;

            /** Speaker buffer length in samples once the data are resampled
             *  (used for mixing and recording)
             */
            int _nSamplesSpkr; 

            /** Mic buffer length in samples once the data are resampled
             *  (used for mixing and recording)
             */
            int _nSamplesMic;
            
            /**
             * Maximum number of sample for audio buffers (mic and spkr)
             */
            int _nbSamplesMax; 
            
146 147 148 149
            /**
             * Manager instance. 
             */
             ManagerImpl * _manager;
150 151

	     int converterSamplingRate;
152
            
153 154 155 156 157 158
        protected:
            SIPCall * _ca;
            
    };    
    
    template <typename D>
159
    AudioRtpSession<D>::AudioRtpSession(ManagerImpl * manager, SIPCall * sipcall) :
160 161 162 163 164 165 166 167 168 169 170 171 172
     _time (new ost::Time()), 
     _mainloopSemaphore(0),
     _audiocodec (NULL),
     _audiolayer (NULL),
     _ca (sipcall), 
     _micData (NULL), 
     _micDataConverted (NULL), 
     _micDataEncoded (NULL), 
     _spkrDataDecoded (NULL), 
     _spkrDataConverted (NULL),
     _converter (NULL),
     _layerSampleRate(0),
     _codecSampleRate(0), 
173 174
     _layerFrameSize(0),
     _manager(manager)
175 176 177 178 179 180 181 182
    {
        setCancel (cancelDefault);

        assert(_ca);
        
        _debug ("Local audio port %i will be used\n", _ca->getLocalAudioPort());

        //mic, we receive from soundcard in stereo, and we send encoded
183
        _audiolayer = _manager->getAudioDriver();
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
        
        if (_audiolayer == NULL) { throw AudioRtpSessionException(); }
        
        _layerFrameSize = _audiolayer->getFrameSize(); // in ms
        _layerSampleRate = _audiolayer->getSampleRate();

    }
    
    template <typename D>
    AudioRtpSession<D>::~AudioRtpSession()
    {
        _debug ("Delete AudioRtpSession instance\n");

        try {
            terminate();
        } catch (...) {
            _debugException ("Thread destructor didn't terminate correctly");
            throw;
        }

204
	_debug("Unbind audio RTP stream for call id %s\n", _ca->getCallId().c_str());
205 206
	_audiolayer->getMainBuffer()->unBindAll(_ca->getCallId());

207 208 209 210 211 212 213 214 215 216 217 218 219
        delete [] _micData;
        delete [] _micDataConverted;
        delete [] _micDataEncoded;
        delete [] _spkrDataDecoded;
        delete [] _spkrDataConverted;
        delete _time;
        delete _converter;
        _debug ("AudioRtpSession instance deleted\n");
    }
    
    template <typename D>
    void AudioRtpSession<D>::initBuffers() 
    {
220 221 222 223 224 225
	// Set sampling rate, main buffer choose the highest one
	_audiolayer->getMainBuffer()->setInternalSamplingRate(_codecSampleRate);

	// may be different than one already setted
	converterSamplingRate = _audiolayer->getMainBuffer()->getInternalSamplingRate();

226 227
	// initialize SampleRate converter using AudioLayer's sampling rate
	// (internal buffers initialized with maximal sampling rate and frame size)
228
        _converter = new SamplerateConverter(_layerSampleRate, _layerFrameSize);
229

230
        int nbSamplesMax = (int)(_codecSampleRate * _layerFrameSize /1000)*2;
231 232 233 234 235
        _micData = new SFLDataFormat[nbSamplesMax];
        _micDataConverted = new SFLDataFormat[nbSamplesMax];
        _micDataEncoded = new unsigned char[nbSamplesMax];
        _spkrDataConverted = new SFLDataFormat[nbSamplesMax];
        _spkrDataDecoded = new SFLDataFormat[nbSamplesMax];
236 237

	_manager->addStream(_ca->getCallId());
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
    }
    
    template <typename D>
    void AudioRtpSession<D>::setSessionTimeouts(void) 
    {
        try {
            static_cast<D*>(this)->setSchedulingTimeout (schedulingTimeout);
            static_cast<D*>(this)->setExpireTimeout (expireTimeout);
        } catch (...) {
            _debugException ("Initialization failed while setting timeouts");
            throw AudioRtpSessionException();
        }
    }
    
    template <typename D>
    void AudioRtpSession<D>::setSessionMedia(void)
    {
        assert(_ca);
256 257 258 259

	AudioCodecType pl = (AudioCodecType)_ca->getLocalSDP()->get_session_media()->getPayload();
	_audiocodec = _manager->getCodecDescriptorMap().instantiateCodec(pl);

260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
        if (_audiocodec == NULL) {
            _debug ("No audiocodec, can't init RTP media\n");
            throw AudioRtpSessionException();
        }

        _debug ("Init audio RTP session: codec payload %i\n", _audiocodec->getPayload());

        _codecSampleRate = _audiocodec->getClockRate();
        _codecFrameSize = _audiocodec->getFrameSize();

        //TODO: figure out why this is necessary.
        if (_audiocodec->getPayload() == 9) {
            _debug ("Setting payload format to G722\n");
            static_cast<D*>(this)->setPayloadFormat (ost::DynamicPayloadFormat ( (ost::PayloadType) _audiocodec->getPayload(), _audiocodec->getClockRate()));
        } else if (_audiocodec->hasDynamicPayload()) {
            _debug ("Setting a dynamic payload format\n");
            static_cast<D*>(this)->setPayloadFormat (ost::DynamicPayloadFormat ( (ost::PayloadType) _audiocodec->getPayload(), _audiocodec->getClockRate()));
        } else if (!_audiocodec->hasDynamicPayload() && _audiocodec->getPayload() != 9) {
            _debug ("Setting a static payload format\n");
            static_cast<D*>(this)->setPayloadFormat (ost::StaticPayloadFormat ( (ost::StaticPayloadType) _audiocodec->getPayload()));
        }
    }
    
    template <typename D>
    void AudioRtpSession<D>::setDestinationIpAddress(void)
    {
        if (_ca == NULL) {
            _debug ("Sipcall is gone.\n");
            throw AudioRtpSessionException();
        }
        
        _debug ("Setting IP address for the RTP session\n");
        
        ost::InetHostAddress remote_ip (_ca->getLocalSDP()->get_remote_ip().c_str());
        _debug ("Init audio RTP session: remote ip %s\n", _ca->getLocalSDP()->get_remote_ip().data());

        if (!remote_ip) {
            _debug ("Target IP address [%s] is not correct!\n", _ca->getLocalSDP()->get_remote_ip().data());
            return;
        }

        if (! static_cast<D*>(this)->addDestination (remote_ip, (unsigned short) _ca->getLocalSDP()->get_remote_audio_port())) {
            _debug ("Can't add destination to session!\n");
            return;
        }
    }
    
    template <typename D>
    int AudioRtpSession<D>::processDataEncode(void)
    {
        assert(_audiocodec);
        assert(_audiolayer);
312

313
	int _mainBufferSampleRate = _audiolayer->getMainBuffer()->getInternalSamplingRate();
314

315 316 317 318 319 320 321
        // compute codec framesize in ms
        float fixed_codec_framesize = computeCodecFrameSize (_audiocodec->getFrameSize(), _audiocodec->getClockRate());

        // compute nb of byte to get coresponding to 20 ms at audio layer frame size (44.1 khz)
        int maxBytesToGet = computeNbByteAudioLayer (fixed_codec_framesize);

        // available bytes inside ringbuffer
322
        int availBytesFromMic = _audiolayer->getMainBuffer()->availForGet(_ca->getCallId());
323 324 325 326 327 328 329 330

        // set available byte to maxByteToGet
        int bytesAvail = (availBytesFromMic < maxBytesToGet) ? availBytesFromMic : maxBytesToGet;

        if (bytesAvail == 0)
            return 0;

        // Get bytes from micRingBuffer to data_from_mic
331
        int nbSample = _audiolayer->getMainBuffer()->getData(_micData , bytesAvail, 100, _ca->getCallId()) / sizeof (SFLDataFormat);
332 333 334 335 336

        // nb bytes to be sent over RTP
        int compSize = 0;

        // test if resampling is required
337
        if (_audiocodec->getClockRate() != _mainBufferSampleRate) {
338 339
            int nb_sample_up = nbSample;
            _nSamplesMic = nbSample;
340
            nbSample = _converter->downsampleData (_micData , _micDataConverted , _audiocodec->getClockRate(), _mainBufferSampleRate, nb_sample_up);
Alexandre Savard's avatar
Alexandre Savard committed
341

342
            compSize = _audiocodec->codecEncode (_micDataEncoded, _micDataConverted, nbSample*sizeof (int16));
Alexandre Savard's avatar
Alexandre Savard committed
343

344 345 346 347 348 349 350 351 352 353 354
        } else {
            // no resampling required
            compSize = _audiocodec->codecEncode (_micDataEncoded, _micData, nbSample*sizeof (int16));
        }

        return compSize;
    }
    
    template <typename D>
    void AudioRtpSession<D>::processDataDecode(unsigned char * spkrData, unsigned int size, int& countTime) 
    {
Alexandre Savard's avatar
Alexandre Savard committed
355

356
        if (_audiocodec != NULL) {
357

358

359 360
	    int _mainBufferSampleRate = _audiolayer->getMainBuffer()->getInternalSamplingRate();

361 362 363 364 365 366 367
            // Return the size of data in bytes
            int expandedSize = _audiocodec->codecDecode (_spkrDataDecoded , spkrData , size);

            // buffer _receiveDataDecoded ----> short int or int16, coded on 2 bytes
            int nbSample = expandedSize / sizeof (SFLDataFormat);

            // test if resampling is required
368
            if (_audiocodec->getClockRate() != _mainBufferSampleRate) {
369 370 371

                // Do sample rate conversion
                int nb_sample_down = nbSample;
Alexandre Savard's avatar
Alexandre Savard committed
372

373
                nbSample = _converter->upsampleData (_spkrDataDecoded, _spkrDataConverted, _codecSampleRate, _mainBufferSampleRate, nb_sample_down);
Alexandre Savard's avatar
Alexandre Savard committed
374

375 376 377 378
                // Store the number of samples for recording
                _nSamplesSpkr = nbSample;

                // put data in audio layer, size in byte
379
		_audiolayer->getMainBuffer()->putData (_spkrDataConverted, nbSample * sizeof (SFLDataFormat), 100, _ca->getCallId());
380

Alexandre Savard's avatar
Alexandre Savard committed
381

382 383 384 385
            } else {
                // Store the number of samples for recording
                _nSamplesSpkr = nbSample;

386

387
                // put data in audio layer, size in byte
388
                _audiolayer->getMainBuffer()->putData (_spkrDataDecoded, expandedSize, 100, _ca->getCallId());
389 390 391 392 393
            }

            // Notify (with a beep) an incoming call when there is already a call
            countTime += _time->getSecond();

394
            if (_manager->incomingCallWaiting() > 0) {
395 396 397
                countTime = countTime % 500; // more often...

                if (countTime == 0) {
398
                    _manager->notificationIncomingCall();
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
                }
            }

        } else {
            countTime += _time->getSecond();
        }
    }
    
    template <typename D>
    void AudioRtpSession<D>::sendMicData(int timestamp)
    {
        // STEP:
        //   1. get data from mic
        //   2. convert it to int16 - good sample, good rate
        //   3. encode it
        //   4. send it

        timestamp += _time->getSecond();

        if (!_audiolayer) {
            _debug ("No audiolayer available for MIC\n");
            return;
        }

        if (!_audiocodec) {
            _debug ("No audiocodec available for MIC\n");
            return;
        }

        int compSize = processDataEncode();

        // putData put the data on RTP queue, sendImmediate bypass this queue
        static_cast<D*>(this)->putData (timestamp, _micDataEncoded, compSize);
    }
    
    
    template <typename D>
    void AudioRtpSession<D>::receiveSpeakerData (int& countTime)
    {
        if (!_audiolayer) {
            _debug ("No audiolayer available for speaker\n");
            return;
        }

        if (!_audiocodec) {
            _debug ("No audiocodec available for speaker\n");
            return;
        }

        const ost::AppDataUnit* adu = NULL;

        adu = static_cast<D*>(this)->getData(static_cast<D*>(this)->getFirstTimestamp());

        if (adu == NULL) {
            // _debug("No RTP audio stream\n");
            return;
        }

        unsigned char* spkrData  = (unsigned char*) adu->getData(); // data in char

        unsigned int size = adu->getSize(); // size in char

        processDataDecode (spkrData, size, countTime);
    }
    
    template <typename D>
    int AudioRtpSession<D>::startRtpThread ()
    {
        _debug("Starting main thread\n");
        return start(_mainloopSemaphore);
    }
    
    template <typename D>
    void AudioRtpSession<D>::run ()
    {

        setSessionTimeouts();
        setDestinationIpAddress();
        setSessionMedia();

479 480
	initBuffers();

481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
        int sessionWaiting;
        int timestep = _codecFrameSize;
        int timestamp = static_cast<D*>(this)->getCurrentTimestamp(); // for mic
        int countTime = 0; // for receive
        int threadSleep = 0;

        if (_codecSampleRate != 0)
            { threadSleep = (_codecFrameSize * 1000) / _codecSampleRate; }
        else
            { threadSleep = _layerFrameSize; }

        TimerPort::setTimer (threadSleep);
        
        if (_audiolayer == NULL) {
            _debug("For some unknown reason, audiolayer is null, just as \
            we were about to start the audio stream\n");
            throw AudioRtpSessionException();
        }

500 501
	_ca->setRecordingSmplRate(_audiocodec->getClockRate());

502 503 504 505 506 507
        _audiolayer->startStream();
        static_cast<D*>(this)->startRunning();

        _debug ("Entering RTP mainloop for callid %s\n",_ca->getCallId().c_str());

        while (!testCancel()) {
Alexandre Savard's avatar
Alexandre Savard committed
508

509 510
	    converterSamplingRate = _audiolayer->getMainBuffer()->getInternalSamplingRate();

511 512 513 514 515 516 517 518 519 520 521 522
            // Send session
            sessionWaiting = static_cast<D*>(this)->isWaiting();

            sendMicData (timestamp);
            timestamp += timestep;

            // Recv session
            receiveSpeakerData (countTime);

            // Let's wait for the next transmit cycle
            if (sessionWaiting == 1) {
                // Record mic and speaker during conversation
523
                _ca->recAudio.recData (_spkrDataDecoded, _micData, _nSamplesSpkr, _nSamplesMic);
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
            } else {
                // Record mic only while leaving a message
                _ca->recAudio.recData (_micData,_nSamplesMic);
            }

            // Let's wait for the next transmit cycle
            Thread::sleep (TimerPort::getTimer());

            // TimerPort::incTimer(20); // 'frameSize' ms
            TimerPort::incTimer (threadSleep);
        }
        
        _debug ("Left RTP main loop for callid %s\n",_ca->getCallId().c_str());
    }
    
}
#endif // __AUDIO_RTP_SESSION_H__