diff --git a/sflphone-common/src/audio/delaydetection.cpp b/sflphone-common/src/audio/delaydetection.cpp index 03fbf32eaf53eb289ce01b6d3e4707ddb7b50681..08535d9b52c52260bd09039d5c1b7785798e6f3d 100644 --- a/sflphone-common/src/audio/delaydetection.cpp +++ b/sflphone-common/src/audio/delaydetection.cpp @@ -32,22 +32,38 @@ #include "delaydetection.h" #include "math.h" - +// #include <stdio.h> +#include <string.h> // decimation filter coefficient -float decimationCoefs[] = {0.1, 0.1, 0.1, 0.1, 0.1}; +float decimationCoefs[] = {-0.09870257, 0.07473655, 0.05616626, 0.04448337, 0.03630817, 0.02944626, + 0.02244098, 0.01463477, 0.00610982, -0.00266367, -0.01120109, -0.01873722, + -0.02373243, -0.02602213, -0.02437806, -0.01869834, -0.00875287, 0.00500204, + 0.02183252, 0.04065763, 0.06015944, 0.0788299, 0.09518543, 0.10799179, + 0.1160644, 0.12889288, 0.1160644, 0.10799179, 0.09518543, 0.0788299, + 0.06015944, 0.04065763, 0.02183252, 0.00500204, -0.00875287, -0.01869834, + -0.02437806, -0.02602213, -0.02373243, -0.01873722, -0.01120109, -0.00266367, + 0.00610982, 0.01463477, 0.02244098, 0.02944626, 0.03630817, 0.04448337, + 0.05616626, 0.07473655, -0.09870257}; std::vector<double> ird(decimationCoefs, decimationCoefs + sizeof(decimationCoefs)/sizeof(float)); // decimation filter coefficient -float bandpassCoefs[] = {0.1, 0.1, 0.1, 0.1, 0.1}; +float bandpassCoefs[] = {0.06278034, -0.0758545, -0.02274943, -0.0084497, 0.0702427, 0.05986113, + 0.06436469, -0.02412049, -0.03433526, -0.07568665, -0.03214543, -0.07236507, + -0.06979052, -0.12446371, -0.05530828, 0.00947243, 0.15294699, 0.17735563, + 0.15294699, 0.00947243, -0.05530828, -0.12446371, -0.06979052, -0.07236507, + -0.03214543, -0.07568665, -0.03433526, -0.02412049, 0.06436469, 0.05986113, + 0.0702427, -0.0084497, -0.02274943, -0.0758545, 0.06278034}; std::vector<double> irb(bandpassCoefs, bandpassCoefs + sizeof(bandpassCoefs)/sizeof(float)); -FirFilter::FirFilter(std::vector<double> ir) : _impulseResponse(ir), - _length(ir.size()), +FirFilter::FirFilter(std::vector<double> ir) : _length(ir.size()), + _impulseResponse(ir), _count(0) -{} +{ + memset(_taps, 0, sizeof(double)*MAXFILTERSIZE); +} FirFilter::~FirFilter() {} @@ -68,65 +84,154 @@ float FirFilter::getOutputSample(float inputSample) return result; } +void FirFilter::reset(void) +{ + for(int i = 0; i < _length; i++) { + _impulseResponse[i] = 0.0; + } +} -DelayDetection::DelayDetection() : _decimationFilter(ird), _bandpassFilter(irb) {} + +DelayDetection::DelayDetection() : _internalState(WaitForSpeaker), _decimationFilter(ird), _bandpassFilter(irb), _segmentSize(DELAY_BUFF_SIZE), _downsamplingFactor(8) +{ + _micDownSize = WINDOW_SIZE / _downsamplingFactor; + _spkrDownSize = DELAY_BUFF_SIZE / _downsamplingFactor; + + memset(_spkrReference, 0, sizeof(float)*WINDOW_SIZE*2); + memset(_capturedData, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + memset(_spkrReferenceDown, 0, sizeof(float)*WINDOW_SIZE*2); + memset(_captureDataDown, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + memset(_spkrReferenceFilter, 0, sizeof(float)*WINDOW_SIZE*2); + memset(_captureDataFilter, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + memset(_correlationResult, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + +} DelayDetection::~DelayDetection(){} -void DelayDetection::reset() {} +void DelayDetection::reset() +{ + _nbMicSampleStored = 0; + _nbSpkrSampleStored = 0; + + _decimationFilter.reset(); + _bandpassFilter.reset(); + + memset(_spkrReference, 0, sizeof(float)*WINDOW_SIZE*2); + memset(_capturedData, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + memset(_spkrReferenceDown, 0, sizeof(float)*WINDOW_SIZE*2); + memset(_captureDataDown, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + memset(_spkrReferenceFilter, 0, sizeof(float)*WINDOW_SIZE*2); + memset(_captureDataFilter, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + memset(_correlationResult, 0, sizeof(float)*DELAY_BUFF_SIZE*2); + + _internalState = WaitForSpeaker; +} void DelayDetection::putData(SFLDataFormat *inputData, int nbBytes) { + + // Machine may already got a spkr and is waiting for mic or computing correlation + if(_nbSpkrSampleStored == WINDOW_SIZE) + return; + int nbSamples = nbBytes/sizeof(SFLDataFormat); + + if((_nbSpkrSampleStored + nbSamples) > WINDOW_SIZE) + nbSamples = WINDOW_SIZE - _nbSpkrSampleStored; + + + if (nbSamples) { + + float tmp[nbSamples]; + float down[nbSamples]; + + convertInt16ToFloat32(inputData, tmp, nbSamples); + memcpy(_spkrReference+_nbSpkrSampleStored, tmp, nbSamples*sizeof(float)); + + downsampleData(tmp, down, nbSamples, _downsamplingFactor); + bandpassFilter(down, nbSamples/_downsamplingFactor); + memcpy(_spkrReferenceDown+(_nbSpkrSampleStored/_downsamplingFactor), down, (nbSamples/_downsamplingFactor)*sizeof(float)); - float tmp[nbSamples]; - float down[nbSamples]; + _nbSpkrSampleStored += nbSamples; + + } + + // Update the state + _internalState = WaitForMic; - convertInt16ToFloat32(inputData, tmp, nbSamples); - downsampleData(tmp, down, nbSamples, 8); - bandpassFilter(down, nbSamples/8); } int DelayDetection::getData(SFLDataFormat *outputData) { return 0; } void DelayDetection::process(SFLDataFormat *inputData, int nbBytes) { + if(_internalState != WaitForMic) + return; + int nbSamples = nbBytes/sizeof(SFLDataFormat); - float tmp[nbSamples]; - float down[nbSamples]; + if((_nbMicSampleStored + nbSamples) > DELAY_BUFF_SIZE) + nbSamples = DELAY_BUFF_SIZE - _nbMicSampleStored; + + if(nbSamples) { + float tmp[nbSamples]; + float down[nbSamples]; + + convertInt16ToFloat32(inputData, tmp, nbSamples); + memcpy(_capturedData+_nbMicSampleStored, tmp, nbSamples); + + downsampleData(tmp, down, nbSamples, _downsamplingFactor); + bandpassFilter(down, nbSamples/_downsamplingFactor); + memcpy(_capturedData+(_nbMicSampleStored/_downsamplingFactor), tmp, nbSamples); + + _nbMicSampleStored += nbSamples; + + } + + if(_nbMicSampleStored == DELAY_BUFF_SIZE) + _internalState = ComputeCorrelation; + else + return; + + _debug("_spkrDownSize: %d, _micDownSize: %d", _spkrDownSize, _micDownSize); + crossCorrelate(_spkrReferenceDown, _captureDataDown, _correlationResult, _micDownSize, _spkrDownSize); + + int maxIndex = getMaxIndex(_correlationResult, _spkrDownSize); - convertInt16ToFloat32(inputData, tmp, nbSamples); - downsampleData(tmp, down, nbSamples, 8); - bandpassFilter(down, nbSamples/8); + _debug("MaxIndex: %d", maxIndex); + + reset(); } int DelayDetection::process(SFLDataFormat *intputData, SFLDataFormat *outputData, int nbBytes) { return 0; } void DelayDetection::process(SFLDataFormat *micData, SFLDataFormat *spkrData, SFLDataFormat *outputData, int nbBytes) {} -void DelayDetection::crossCorrelate(double *ref, double *seg, double *res, short refSize, short segSize) { +void DelayDetection::crossCorrelate(float *ref, float *seg, float *res, int refSize, int segSize) { int counter = 0; // Output has same size as the - short rsize = refSize; - short ssize = segSize; - short tmpsize = segSize-refSize+1; + int rsize = refSize; + int ssize = segSize; + int tmpsize = segSize-refSize+1; // perform autocorrelation on reference signal - double acref = correlate(ref, ref, rsize); - + float acref = correlate(ref, ref, rsize); + _debug("acref: %f", acref); + // perform crossrelation on signal - double acseg = 0.0; - double r; + float acseg = 0.0; + float r; while(--tmpsize) { --ssize; acseg = correlate(seg+tmpsize, seg+tmpsize, rsize); + _debug("acseg: %f", acseg); res[ssize] = correlate(ref, seg+tmpsize, rsize); r = sqrt(acref*acseg); - if(r < 0.0001) + if(r < 0.0000001) res[ssize] = 0.0; else res[ssize] = res[ssize] / r; @@ -150,7 +255,7 @@ void DelayDetection::crossCorrelate(double *ref, double *seg, double *res, short } } -double DelayDetection::correlate(double *sig1, double *sig2, short size) { +double DelayDetection::correlate(float *sig1, float *sig2, short size) { short s = size; @@ -199,3 +304,19 @@ void DelayDetection::bandpassFilter(float *input, int nbSamples) { input[i] = _bandpassFilter.getOutputSample(input[i]); } } + + +int DelayDetection::getMaxIndex(float *data, int size) { + + float max = 0.0; + int k; + + for(int i = 0; i < size; i++) { + if(data[i] >= max) { + max = data[i]; + k = i; + } + } + + return k; +} diff --git a/sflphone-common/src/audio/delaydetection.h b/sflphone-common/src/audio/delaydetection.h index 3919fc7ba5ca76663da187079f558e6f7202cac4..a7567b6c2e8ccdfb82301724c5f151548517a8f2 100644 --- a/sflphone-common/src/audio/delaydetection.h +++ b/sflphone-common/src/audio/delaydetection.h @@ -41,10 +41,12 @@ #define MAX_DELAY 150 // Size of internal buffers in samples -#define DELAY_BUFF_SIZE 150*8000/1000 +#define DELAY_BUFF_SIZE MAX_DELAY*8000/1000 #define MAXFILTERSIZE 100 + + class FirFilter { public: @@ -64,6 +66,8 @@ class FirFilter { */ float getOutputSample(float inputSample); + void reset(void); + private: @@ -112,15 +116,22 @@ class DelayDetection : public Algorithm { private: + enum State { + WaitForSpeaker, + WaitForMic, + ComputeCorrelation + }; + + /** * Perform a normalized crosscorrelation between template and segment */ - void crossCorrelate(double *ref, double *seg, double *res, short refSize, short segSize); + void crossCorrelate(float *ref, float *seg, float *res, int refSize, int segSize); /** * Perform a correlation on specified signals (mac) */ - double correlate(double *sig1, double *sig2, short size); + double correlate(float *sig1, float *sig2, short size); void convertInt16ToFloat32(SFLDataFormat *input, float *ouput, int nbSamples); @@ -128,36 +139,49 @@ class DelayDetection : public Algorithm { void bandpassFilter(float *input, int nbSamples); + int getMaxIndex(float *data, int size); + + State _internalState; + + FirFilter _decimationFilter; + + FirFilter _bandpassFilter; + /** * Segment size in samples for correlation */ short _segmentSize; + int _downsamplingFactor; + /** * Resulting correlation size (s + w -1) */ short _correlationSize; - float _spkrReference[DELAY_BUFF_SIZE]; + float _spkrReference[WINDOW_SIZE*2]; - float _capturedData[DELAY_BUFF_SIZE]; + float _capturedData[DELAY_BUFF_SIZE*2]; - float _spkrReferenceDown[DELAY_BUFF_SIZE]; + float _spkrReferenceDown[WINDOW_SIZE*2]; - float _captureDataDown[DELAY_BUFF_SIZE]; + float _captureDataDown[DELAY_BUFF_SIZE*2]; - float _spkrReferenceFilter[DELAY_BUFF_SIZE]; + float _spkrReferenceFilter[WINDOW_SIZE*2]; - float _captureDataFilter[DELAY_BUFF_SIZE]; + float _captureDataFilter[DELAY_BUFF_SIZE*2]; - // int myints[] = {16,2,77,29}; - // vector<int> fifth (myints, myints + sizeof(myints) / sizeof(int) ); + float _correlationResult[DELAY_BUFF_SIZE*2]; - FirFilter _decimationFilter; + int _remainingIndex; - FirFilter _bandpassFilter; + int _spkrDownSize; - int _remainingIndex; + int _micDownSize; + + int _nbMicSampleStored; + + int _nbSpkrSampleStored; public: diff --git a/sflphone-common/src/audio/echocancel.cpp b/sflphone-common/src/audio/echocancel.cpp index f603899d69c2e6927ac8504af35444b708a21b5d..e3a84103d0bb89beea8b19a4776a390576bc332e 100644 --- a/sflphone-common/src/audio/echocancel.cpp +++ b/sflphone-common/src/audio/echocancel.cpp @@ -213,6 +213,8 @@ void EchoCancel::reset() void EchoCancel::putData(SFLDataFormat *inputData, int nbBytes) { + _delayDetector.putData(inputData, nbBytes); + if(_spkrStoped) { _debug("EchoCancel: Flush data"); _micData->flushAll(); @@ -244,6 +246,8 @@ void EchoCancel::process(SFLDataFormat *data, int nbBytes) {} int EchoCancel::process(SFLDataFormat *inputData, SFLDataFormat *outputData, int nbBytes) { + _delayDetector.process(inputData, nbBytes); + if(_spkrStoped) { bcopy(inputData, outputData, nbBytes); return nbBytes; diff --git a/sflphone-common/src/audio/echocancel.h b/sflphone-common/src/audio/echocancel.h index 2b4cba1e6f70e52519f26c67a1c54caf6ebd23e7..f5725d9141c1b4d097b02c8ee09b5288d879ffe3 100644 --- a/sflphone-common/src/audio/echocancel.h +++ b/sflphone-common/src/audio/echocancel.h @@ -36,6 +36,7 @@ #include "audioprocessing.h" #include "ringbuffer.h" +#include "delaydetection.h" // Number of ms in sec #define MS_PER_SEC 1000 @@ -378,7 +379,9 @@ class EchoCancel : public Algorithm { /** * true if noise suppressor is active, false elsewhere */ - bool _noiseActive; + bool _noiseActive; + + DelayDetection _delayDetector; }; diff --git a/sflphone-common/test/delaydetectiontest.cpp b/sflphone-common/test/delaydetectiontest.cpp index 1b523db7c6b7c983b6973bc97b0ae91dea914a3e..61771fccc04776c772c2f0c0a5f4590007d3d8f3 100644 --- a/sflphone-common/test/delaydetectiontest.cpp +++ b/sflphone-common/test/delaydetectiontest.cpp @@ -33,6 +33,7 @@ #include <iostream> #include <math.h> +#include <string.h> void DelayDetectionTest::setUp() {} @@ -40,20 +41,18 @@ void DelayDetectionTest::tearDown() {} void DelayDetectionTest::testCrossCorrelation() { - double signal[10] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; - double ref[3] = {0.0, 1.0, 2.0}; + float signal[10] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; + float ref[3] = {0.0, 1.0, 2.0}; - double result[10]; - double expected[10] = {0.0, 0.89442719, 1.0, 0.95618289, 0.91350028, 0.88543774, 0.86640023, 0.85280287, 0.8426548, 0.83480969}; + float result[10]; + float expected[10] = {0.0, 0.89442719, 1.0, 0.95618289, 0.91350028, 0.88543774, 0.86640023, 0.85280287, 0.8426548, 0.83480969}; CPPUNIT_ASSERT(_delaydetect.correlate(ref, ref, 3) == 5.0); CPPUNIT_ASSERT(_delaydetect.correlate(signal, signal, 10) == 285.0); _delaydetect.crossCorrelate(ref, signal, result, 3, 10); - std::cout << std::endl; - - double tmp; + float tmp; for (int i = 0; i < 10; i++) { tmp = result[i]-expected[i]; if(tmp < 0.0) @@ -61,7 +60,97 @@ void DelayDetectionTest::testCrossCorrelation() else CPPUNIT_ASSERT(tmp < 0.001); } +} + +void DelayDetectionTest::testCrossCorrelationDelay() +{ + float signal[10] = {0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0}; + float ref[3] = {0.0, 1.0, 0.0}; + float result[10]; + _delaydetect.crossCorrelate(ref, signal, result, 3, 10); + + float expected[10] = {0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0}; + } +void DelayDetectionTest::testFirFilter() +{ + float decimationCoefs[] = {-0.09870257, 0.07473655, 0.05616626, 0.04448337, 0.03630817, 0.02944626, + 0.02244098, 0.01463477, 0.00610982, -0.00266367, -0.01120109, -0.01873722, + -0.02373243, -0.02602213, -0.02437806, -0.01869834, -0.00875287, 0.00500204, + 0.02183252, 0.04065763, 0.06015944, 0.0788299, 0.09518543, 0.10799179, + 0.1160644, 0.12889288, 0.1160644, 0.10799179, 0.09518543, 0.0788299, + 0.06015944, 0.04065763, 0.02183252, 0.00500204, -0.00875287, -0.01869834, + -0.02437806, -0.02602213, -0.02373243, -0.01873722, -0.01120109, -0.00266367, + 0.00610982, 0.01463477, 0.02244098, 0.02944626, 0.03630817, 0.04448337, + 0.05616626, 0.07473655, -0.09870257}; + std::vector<double> ird(decimationCoefs, decimationCoefs + sizeof(decimationCoefs)/sizeof(float)); + + float bandpassCoefs[] = {0.06278034, -0.0758545, -0.02274943, -0.0084497, 0.0702427, 0.05986113, + 0.06436469, -0.02412049, -0.03433526, -0.07568665, -0.03214543, -0.07236507, + -0.06979052, -0.12446371, -0.05530828, 0.00947243, 0.15294699, 0.17735563, + 0.15294699, 0.00947243, -0.05530828, -0.12446371, -0.06979052, -0.07236507, + -0.03214543, -0.07568665, -0.03433526, -0.02412049, 0.06436469, 0.05986113, + 0.0702427, -0.0084497, -0.02274943, -0.0758545, 0.06278034}; + std::vector<double> irb(bandpassCoefs, bandpassCoefs + sizeof(bandpassCoefs)/sizeof(float)); + + float impulse[100]; + memset(impulse, 0, sizeof(float)*100); + impulse[0] = 1.0; + + FirFilter _decimationFilter(ird); + FirFilter _bandpassFilter(irb); + + float impulseresponse[100]; + memset(impulseresponse, 0, sizeof(float)*100); + + // compute impulse response + for(int i = 0; i < 100; i++) { + impulseresponse[i] = _decimationFilter.getOutputSample(impulse[i]); + } + + float tmp; + int size = sizeof(decimationCoefs)/sizeof(float); + for(int i = 0; i < size; i++) { + tmp = decimationCoefs[i] - impulseresponse[i]; + if(tmp < 0.0) + CPPUNIT_ASSERT (tmp > -0.000001); + else + CPPUNIT_ASSERT(tmp < 0.000001); + } + + + for(int i = 0; i < 100; i++) { + impulseresponse[i] = _bandpassFilter.getOutputSample(impulse[i]); + } + + size = sizeof(bandpassCoefs)/sizeof(float); + for(int i = 0; i < size; i++) { + tmp = bandpassCoefs[i] - impulseresponse[i]; + if(tmp < 0.0) + CPPUNIT_ASSERT (tmp > -0.000001); + else + CPPUNIT_ASSERT(tmp < 0.000001); + } + +} + + + +void DelayDetectionTest::testDelayDetection() { + + int delay = 3; + + SFLDataFormat spkr[WINDOW_SIZE]; + memset(spkr, 0, sizeof(SFLDataFormat)*WINDOW_SIZE); + spkr[0] = 32000; + + SFLDataFormat mic[DELAY_BUFF_SIZE]; + memset(mic, 0, sizeof(SFLDataFormat)*DELAY_BUFF_SIZE); + mic[delay] = 32000; + + _delaydetect.putData(spkr, WINDOW_SIZE*sizeof(SFLDataFormat)); + _delaydetect.process(mic, DELAY_BUFF_SIZE*sizeof(SFLDataFormat)); +} diff --git a/sflphone-common/test/delaydetectiontest.h b/sflphone-common/test/delaydetectiontest.h index 7de2dd3d32d75dd9557b4bb67c5160d1e80b60f8..cab6333edf61fe1c2466e5dacf4bd431d251b812 100644 --- a/sflphone-common/test/delaydetectiontest.h +++ b/sflphone-common/test/delaydetectiontest.h @@ -72,6 +72,9 @@ class DelayDetectionTest : public CppUnit::TestCase { CPPUNIT_TEST_SUITE( DelayDetectionTest ); CPPUNIT_TEST( testCrossCorrelation ); + CPPUNIT_TEST( testCrossCorrelationDelay ); + CPPUNIT_TEST( testFirFilter ); + CPPUNIT_TEST( testDelayDetection ); CPPUNIT_TEST_SUITE_END(); public: @@ -84,6 +87,12 @@ class DelayDetectionTest : public CppUnit::TestCase { void testCrossCorrelation(); + void testCrossCorrelationDelay(); + + void testFirFilter(); + + void testDelayDetection(); + private: DelayDetection _delaydetect;