Commit a20de01f authored by Alexandre Savard's avatar Alexandre Savard
Browse files

[#3481] Improve power estimation for speech detection

parent 6a652e93
...@@ -39,6 +39,9 @@ EchoCancel::EchoCancel(int smplRate, int frameLength) : _samplingRate(smplRate), ...@@ -39,6 +39,9 @@ EchoCancel::EchoCancel(int smplRate, int frameLength) : _samplingRate(smplRate),
_nbSegmentPerFrame(0), _nbSegmentPerFrame(0),
_micHistoryLength(0), _micHistoryLength(0),
_spkrHistoryLength(0), _spkrHistoryLength(0),
_alpha(0.01),
_spkrLevelMem(0),
_micLevelMem(0),
_spkrLevel(0), _spkrLevel(0),
_micLevel(0), _micLevel(0),
_spkrHistCnt(0), _spkrHistCnt(0),
...@@ -61,14 +64,14 @@ EchoCancel::EchoCancel(int smplRate, int frameLength) : _samplingRate(smplRate), ...@@ -61,14 +64,14 @@ EchoCancel::EchoCancel(int smplRate, int frameLength) : _samplingRate(smplRate),
{ {
_debug("EchoCancel: Instantiate echo canceller"); _debug("EchoCancel: Instantiate echo canceller");
/*
micFile = new ofstream("micData", ofstream::binary); micFile = new ofstream("micData", ofstream::binary);
echoFile = new ofstream("echoData", ofstream::binary); echoFile = new ofstream("echoData", ofstream::binary);
spkrFile = new ofstream("spkrData", ofstream::binary); spkrFile = new ofstream("spkrData", ofstream::binary);
micLevelData = new ofstream("micLevelData", ofstream::binary); micLevelData = new ofstream("micLevelData", ofstream::binary);
spkrLevelData = new ofstream("spkrLevelData", ofstream::binary); spkrLevelData = new ofstream("spkrLevelData", ofstream::binary);
*/
_micData = new RingBuffer(50000); _micData = new RingBuffer(50000);
_spkrData = new RingBuffer(50000); _spkrData = new RingBuffer(50000);
...@@ -125,7 +128,7 @@ EchoCancel::~EchoCancel() ...@@ -125,7 +128,7 @@ EchoCancel::~EchoCancel()
speex_preprocess_state_destroy(_noiseState); speex_preprocess_state_destroy(_noiseState);
/*
micFile->close(); micFile->close();
spkrFile->close(); spkrFile->close();
echoFile->close(); echoFile->close();
...@@ -138,7 +141,7 @@ EchoCancel::~EchoCancel() ...@@ -138,7 +141,7 @@ EchoCancel::~EchoCancel()
spkrLevelData->close(); spkrLevelData->close();
delete micLevelData; delete micLevelData;
delete spkrLevelData; delete spkrLevelData;
*/
} }
...@@ -263,8 +266,8 @@ int EchoCancel::process(SFLDataFormat *inputData, SFLDataFormat *outputData, int ...@@ -263,8 +266,8 @@ int EchoCancel::process(SFLDataFormat *inputData, SFLDataFormat *outputData, int
_spkrData->Get(_tmpSpkr, byteSize); _spkrData->Get(_tmpSpkr, byteSize);
_micData->Get(_tmpMic, byteSize); _micData->Get(_tmpMic, byteSize);
// micFile->write((const char *)_tmpMic, byteSize); micFile->write((const char *)_tmpMic, byteSize);
// spkrFile->write((const char *)_tmpSpkr, byteSize); spkrFile->write((const char *)_tmpSpkr, byteSize);
// Remove noise // Remove noise
if(_noiseActive) if(_noiseActive)
...@@ -273,7 +276,7 @@ int EchoCancel::process(SFLDataFormat *inputData, SFLDataFormat *outputData, int ...@@ -273,7 +276,7 @@ int EchoCancel::process(SFLDataFormat *inputData, SFLDataFormat *outputData, int
// Processed echo cancellation // Processed echo cancellation
performEchoCancel(_tmpMic, _tmpSpkr, _tmpOut); performEchoCancel(_tmpMic, _tmpSpkr, _tmpOut);
// echoFile->write((const char *)_tmpOut, byteSize); echoFile->write((const char *)_tmpOut, byteSize);
bcopy(_tmpOut, outputData+(nbFrame*_smplPerFrame), byteSize); bcopy(_tmpOut, outputData+(nbFrame*_smplPerFrame), byteSize);
...@@ -306,9 +309,6 @@ void EchoCancel::setSamplingRate(int smplRate) { ...@@ -306,9 +309,6 @@ void EchoCancel::setSamplingRate(int smplRate) {
void EchoCancel::performEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrData, SFLDataFormat *outputData) { void EchoCancel::performEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrData, SFLDataFormat *outputData) {
// int tempmiclevel[_nbSegmentPerFrame];
// int tempspkrlevel[_nbSegmentPerFrame];
for(int k = 0; k < _nbSegmentPerFrame; k++) { for(int k = 0; k < _nbSegmentPerFrame; k++) {
updateEchoCancel(micData+(k*_smplPerSeg), spkrData+(k*_smplPerSeg)); updateEchoCancel(micData+(k*_smplPerSeg), spkrData+(k*_smplPerSeg));
...@@ -319,9 +319,6 @@ void EchoCancel::performEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDa ...@@ -319,9 +319,6 @@ void EchoCancel::performEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDa
// _debug("_spkrLevel: (max): %d", _spkrLevel); // _debug("_spkrLevel: (max): %d", _spkrLevel);
// _debug("_micLevel: (min): %d", _micLevel); // _debug("_micLevel: (min): %d", _micLevel);
// tempspkrlevel[k] = _spkrLevel;
// tempmiclevel[k] = _micLevel;
if(_spkrLevel >= MIN_SIG_LEVEL) { if(_spkrLevel >= MIN_SIG_LEVEL) {
if(_micLevel > _spkrLevel) { if(_micLevel > _spkrLevel) {
increaseFactor(0.2); increaseFactor(0.2);
...@@ -346,18 +343,21 @@ void EchoCancel::performEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDa ...@@ -346,18 +343,21 @@ void EchoCancel::performEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDa
} }
// micLevelData->write((const char *)tempmiclevel, sizeof(int)*_nbSegmentPerFrame);
// spkrLevelData->write((const char *)tempspkrlevel, sizeof(int)*_nbSegmentPerFrame);
} }
void EchoCancel::updateEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrData) { void EchoCancel::updateEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrData) {
// TODO: we should find a way to normalize signal at this point // TODO: we should find a way to normalize signal at this point
int micLvl = computeAmplitudeLevel(micData, _smplPerSeg) / 6; int micLvl = computeAmplitudeLevel(micData, _smplPerSeg);
int spkrLvl = computeAmplitudeLevel(spkrData, _smplPerSeg); int spkrLvl = computeAmplitudeLevel(spkrData, _smplPerSeg);
SFLDataFormat tempSpkrLevel[_smplPerSeg];
SFLDataFormat tempMicLevel[_smplPerSeg];
_spkrLevelMem = estimatePower(spkrData, tempSpkrLevel, _smplPerSeg, _spkrLevelMem);
_micLevelMem = estimatePower(micData, tempMicLevel, _smplPerSeg, _micLevelMem);
// Add 1 to make sure we are not dividing by 0 // Add 1 to make sure we are not dividing by 0
_avgMicLevelHist[_micHistCnt++] = micLvl+1; _avgMicLevelHist[_micHistCnt++] = micLvl+1;
_avgSpkrLevelHist[_spkrHistCnt++] = spkrLvl+1; _avgSpkrLevelHist[_spkrHistCnt++] = spkrLvl+1;
...@@ -370,7 +370,10 @@ void EchoCancel::updateEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDat ...@@ -370,7 +370,10 @@ void EchoCancel::updateEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDat
if(_spkrHistCnt >= _spkrHistoryLength) if(_spkrHistCnt >= _spkrHistoryLength)
_spkrHistCnt = 0; _spkrHistCnt = 0;
micLevelData->write((const char*)tempMicLevel, sizeof(SFLDataFormat)*_smplPerSeg);
spkrLevelData->write((const char*)tempSpkrLevel, sizeof(SFLDataFormat)*_smplPerSeg);
/* /*
// if adaptation done, stop here // if adaptation done, stop here
// if(_adaptDone) // if(_adaptDone)
...@@ -412,7 +415,7 @@ void EchoCancel::updateEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDat ...@@ -412,7 +415,7 @@ void EchoCancel::updateEchoCancel(SFLDataFormat *micData, SFLDataFormat *spkrDat
int EchoCancel::computeAmplitudeLevel(SFLDataFormat *data, int size) { int EchoCancel::computeAmplitudeLevel(SFLDataFormat *data, int size) {
int level = 0; int level = 0;
for(int i = 0; i < size; i++) { for(int i = 0; i < size; i++) {
...@@ -425,6 +428,21 @@ int EchoCancel::computeAmplitudeLevel(SFLDataFormat *data, int size) { ...@@ -425,6 +428,21 @@ int EchoCancel::computeAmplitudeLevel(SFLDataFormat *data, int size) {
level = level / _smplPerSeg; level = level / _smplPerSeg;
return level; return level;
}
SFLDataFormat EchoCancel::estimatePower(SFLDataFormat *data, SFLDataFormat *ampl, int size, SFLDataFormat mem) {
float memFactor = 1.0 - _alpha;
for (int i = 0; i < size; i++) {
mem = (SFLDataFormat)(memFactor*(float)mem + abs(_alpha*(float)data[i]));
// _debug("ampl: %d, memfactor: %f, alpha: %f, data: %d", mem, memFactor, _alpha, data[i]);
ampl[i] = mem;
}
return mem;
} }
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define MIC_LENGTH 80 #define MIC_LENGTH 80
// Voice level threashold // Voice level threashold
#define MIN_SIG_LEVEL 75 #define MIN_SIG_LEVEL 250
// Delay between mic and speaker // Delay between mic and speaker
// #define DELAY_AMPLIFY 60 // #define DELAY_AMPLIFY 60
...@@ -142,6 +142,11 @@ class EchoCancel : public Algorithm { ...@@ -142,6 +142,11 @@ class EchoCancel : public Algorithm {
*/ */
int computeAmplitudeLevel(SFLDataFormat *data, int size); int computeAmplitudeLevel(SFLDataFormat *data, int size);
/**
* Compute amplitude signal
*/
SFLDataFormat estimatePower(SFLDataFormat *data, SFLDataFormat *ampl, int size, SFLDataFormat mem);
/** /**
* Return the max amplitude provided any of _avgSpkrLevelHist or _avgMicLevelHist * Return the max amplitude provided any of _avgSpkrLevelHist or _avgMicLevelHist
*/ */
...@@ -289,6 +294,11 @@ class EchoCancel : public Algorithm { ...@@ -289,6 +294,11 @@ class EchoCancel : public Algorithm {
*/ */
int _adaptCnt; int _adaptCnt;
float _alpha;
SFLDataFormat _spkrLevelMem;
SFLDataFormat _micLevelMem;
int _spkrAdaptCnt; int _spkrAdaptCnt;
int _micAdaptCnt; int _micAdaptCnt;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment