diff --git a/media/libsoundtouch/moz.yaml b/media/libsoundtouch/moz.yaml index b03bf306db58..b566f0d50fd0 100644 --- a/media/libsoundtouch/moz.yaml +++ b/media/libsoundtouch/moz.yaml @@ -17,4 +17,4 @@ origin: license: "LGPL-2.1" # From https://gitlab.com/soundtouch/soundtouch - release: v2.1.2 + release: "a911a1e98689e763e1274f462c00e7c6fedd80f9 (2020-10-03 16:58:00 +0300)" diff --git a/media/libsoundtouch/src/FIFOSampleBuffer.cpp b/media/libsoundtouch/src/FIFOSampleBuffer.cpp index 5206746c0b22..ad368754667f 100644 --- a/media/libsoundtouch/src/FIFOSampleBuffer.cpp +++ b/media/libsoundtouch/src/FIFOSampleBuffer.cpp @@ -265,3 +265,11 @@ uint FIFOSampleBuffer::adjustAmountOfSamples(uint numSamples) } return samplesInBuffer; } + + +/// Add silence to end of buffer +void FIFOSampleBuffer::addSilent(uint nSamples) +{ + memset(ptrEnd(nSamples), 0, sizeof(SAMPLETYPE) * nSamples * channels); + samplesInBuffer += nSamples; +} diff --git a/media/libsoundtouch/src/FIFOSampleBuffer.h b/media/libsoundtouch/src/FIFOSampleBuffer.h index f7623f2a88c4..537a7b87229c 100644 --- a/media/libsoundtouch/src/FIFOSampleBuffer.h +++ b/media/libsoundtouch/src/FIFOSampleBuffer.h @@ -170,6 +170,9 @@ public: /// allow trimming (downwards) amount of samples in pipeline. /// Returns adjusted amount of samples uint adjustAmountOfSamples(uint numSamples); + + /// Add silence to end of buffer + void addSilent(uint nSamples); }; } diff --git a/media/libsoundtouch/src/FIRFilter.cpp b/media/libsoundtouch/src/FIRFilter.cpp index 11af395c7e71..9a72ea52af91 100644 --- a/media/libsoundtouch/src/FIRFilter.cpp +++ b/media/libsoundtouch/src/FIRFilter.cpp @@ -96,17 +96,10 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui suml = sumr = 0; ptr = src + j; - for (i = 0; i < length; i += 4) + for (i = 0; i < length; i ++) { - // loop is unrolled by factor of 4 here for efficiency - suml += ptr[2 * i + 0] * filterCoeffs[i + 0] + - ptr[2 * i + 2] * filterCoeffs[i + 1] + - ptr[2 * i + 4] * filterCoeffs[i + 2] + - ptr[2 * i + 6] * filterCoeffs[i + 3]; - sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] + - ptr[2 * i + 3] * filterCoeffs[i + 1] + - ptr[2 * i + 5] * filterCoeffs[i + 2] + - ptr[2 * i + 7] * filterCoeffs[i + 3]; + suml += ptr[2 * i] * filterCoeffs[i]; + sumr += ptr[2 * i + 1] * filterCoeffs[i]; } #ifdef SOUNDTOUCH_INTEGER_SAMPLES @@ -148,13 +141,9 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint uint i; sum = 0; - for (i = 0; i < length; i += 4) + for (i = 0; i < length; i ++) { - // loop is unrolled by factor of 4 here for efficiency - sum += pSrc[i + 0] * filterCoeffs[i + 0] + - pSrc[i + 1] * filterCoeffs[i + 1] + - pSrc[i + 2] * filterCoeffs[i + 2] + - pSrc[i + 3] * filterCoeffs[i + 3]; + sum += pSrc[i] * filterCoeffs[i]; } #ifdef SOUNDTOUCH_INTEGER_SAMPLES sum >>= resultDivFactor; diff --git a/media/libsoundtouch/src/InterpolateCubic.h b/media/libsoundtouch/src/InterpolateCubic.h index 7f84f280248b..9749bcd1ce40 100644 --- a/media/libsoundtouch/src/InterpolateCubic.h +++ b/media/libsoundtouch/src/InterpolateCubic.h @@ -56,6 +56,11 @@ protected: public: InterpolateCubic(); + + int getLatency() const + { + return 1; + } }; } diff --git a/media/libsoundtouch/src/InterpolateLinear.h b/media/libsoundtouch/src/InterpolateLinear.h index 087dc2aecf51..ffc11bd80e97 100644 --- a/media/libsoundtouch/src/InterpolateLinear.h +++ b/media/libsoundtouch/src/InterpolateLinear.h @@ -60,6 +60,11 @@ public: /// Sets new target rate. Normal rate = 1.0, smaller values represent slower /// rate, larger faster rates. virtual void setRate(double newRate); + + int getLatency() const + { + return 0; + } }; @@ -81,6 +86,11 @@ protected: public: InterpolateLinearFloat(); + + int getLatency() const + { + return 0; + } }; } diff --git a/media/libsoundtouch/src/InterpolateShannon.h b/media/libsoundtouch/src/InterpolateShannon.h index 2a59a84f191b..794e755108fb 100644 --- a/media/libsoundtouch/src/InterpolateShannon.h +++ b/media/libsoundtouch/src/InterpolateShannon.h @@ -61,6 +61,11 @@ protected: public: InterpolateShannon(); + + int getLatency() const + { + return 3; + } }; } diff --git a/media/libsoundtouch/src/RateTransposer.cpp b/media/libsoundtouch/src/RateTransposer.cpp index 24d8f36b34dc..30a91d433222 100644 --- a/media/libsoundtouch/src/RateTransposer.cpp +++ b/media/libsoundtouch/src/RateTransposer.cpp @@ -61,6 +61,7 @@ RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer) // Instantiates the anti-alias filter pAAFilter = new AAFilter(64); pTransposer = TransposerBase::newInstance(); + clear(); } @@ -192,6 +193,10 @@ void RateTransposer::clear() outputBuffer.clear(); midBuffer.clear(); inputBuffer.clear(); + + // prefill buffer to avoid losing first samples at beginning of stream + int prefill = getLatency(); + inputBuffer.addSilent(prefill); } @@ -209,7 +214,8 @@ int RateTransposer::isEmpty() const /// Return approximate initial input-output latency int RateTransposer::getLatency() const { - return (bUseAAFilter) ? pAAFilter->getLength() : 0; + return pTransposer->getLatency() + + ((bUseAAFilter) ? (pAAFilter->getLength() / 2) : 0); } diff --git a/media/libsoundtouch/src/RateTransposer.h b/media/libsoundtouch/src/RateTransposer.h index 5f31d1a6dde2..45e79cf33598 100644 --- a/media/libsoundtouch/src/RateTransposer.h +++ b/media/libsoundtouch/src/RateTransposer.h @@ -83,6 +83,7 @@ public: virtual int transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src); virtual void setRate(double newRate); virtual void setChannels(int channels); + virtual int getLatency() const = 0; // static factory function static TransposerBase *newInstance(); diff --git a/media/libsoundtouch/src/TDStretch.cpp b/media/libsoundtouch/src/TDStretch.cpp index 4abb574d046e..c19c2df42687 100644 --- a/media/libsoundtouch/src/TDStretch.cpp +++ b/media/libsoundtouch/src/TDStretch.cpp @@ -1,4 +1,4 @@ -//////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// /// /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo /// while maintaining the original pitch by using a time domain WSOLA-like @@ -54,6 +54,10 @@ using namespace soundtouch; #define max(x, y) (((x) > (y)) ? (x) : (y)) +#if defined(SOUNDTOUCH_USE_NEON) && defined(SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION) + // SIMD mode, allow shortcuts to avoid operations that aren't aligned to 16-byte boundary + #define ST_SIMD_AVOID_UNALIGNED +#endif /***************************************************************************** * @@ -315,9 +319,10 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) { double corr; // Calculates correlation value for the mixing position corresponding to 'i' -#ifdef _OPENMP +#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED) // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't // iterate the loop in sequential order + // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm); #else // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same @@ -832,21 +837,19 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' // version of the routine. -void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const +void TDStretch::overlapMulti(short *poutput, const short *input) const { - SAMPLETYPE m1=(SAMPLETYPE)0; - SAMPLETYPE m2; - int i=0; + short m1; + int i = 0; - for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --) + for (m1 = 0; m1 < overlapLength; m1 ++) { + short m2 = (short)(overlapLength - m1); for (int c = 0; c < channels; c ++) { poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; i++; } - - m1++; } } @@ -891,20 +894,20 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do unsigned long lnorm; int i; + #ifdef ST_SIMD_AVOID_UNALIGNED + // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary + if (((ulongptr)mixingPos) & 15) return -1e50; + #endif + corr = lnorm = 0; - // Same routine for stereo and mono. For stereo, unroll loop for better - // efficiency and gives slightly better resolution against rounding. - // For mono it same routine, just unrolls loop by factor of 4 - for (i = 0; i < channels * overlapLength; i += 4) + // Same routine for stereo and mono + for (i = 0; i < channels * overlapLength; i += 2) { corr += (mixingPos[i] * compare[i] + - mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow - corr += (mixingPos[i + 2] * compare[i + 2] + - mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm; + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; lnorm += (mixingPos[i] * mixingPos[i] + - mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow - lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + - mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm; + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; + // do intermediate scalings to avoid integer overflow } if (lnorm > maxnorm) @@ -926,7 +929,41 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) { - return calcCrossCorr(mixingPos, compare, norm); + long corr; + long lnorm; + int i; + + // cancel first normalizer tap from previous round + lnorm = 0; + for (i = 1; i <= channels; i ++) + { + lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm; + } + + corr = 0; + // Same routine for stereo and mono. + for (i = 0; i < channels * overlapLength; i += 2) + { + corr += (mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; + } + + // update normalizer with last samples of this round + for (int j = 0; j < channels; j ++) + { + i --; + lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm; + } + + norm += (double)lnorm; + if (norm > maxnorm) + { + maxnorm = (unsigned long)norm; + } + + // Normalize result by dividing by sqrt(norm) - this step is easiest + // done using floating point operation + return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); } #endif // SOUNDTOUCH_INTEGER_SAMPLES @@ -1009,27 +1046,21 @@ void TDStretch::calculateOverlapLength(int overlapInMsec) /// Calculate cross-correlation double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) { - double corr; - double norm; + float corr; + float norm; int i; + #ifdef ST_SIMD_AVOID_UNALIGNED + // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary + if (((ulongptr)mixingPos) & 15) return -1e50; + #endif + corr = norm = 0; - // Same routine for stereo and mono. For Stereo, unroll by factor of 2. - // For mono it's same routine yet unrollsd by factor of 4. - for (i = 0; i < channels * overlapLength; i += 4) + // Same routine for stereo and mono + for (i = 0; i < channels * overlapLength; i ++) { - corr += mixingPos[i] * compare[i] + - mixingPos[i + 1] * compare[i + 1]; - - norm += mixingPos[i] * mixingPos[i] + - mixingPos[i + 1] * mixingPos[i + 1]; - - // unroll the loop for better CPU efficiency: - corr += mixingPos[i + 2] * compare[i + 2] + - mixingPos[i + 3] * compare[i + 3]; - - norm += mixingPos[i + 2] * mixingPos[i + 2] + - mixingPos[i + 3] * mixingPos[i + 3]; + corr += mixingPos[i] * compare[i]; + norm += mixingPos[i] * mixingPos[i]; } anorm = norm; @@ -1040,7 +1071,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) { - double corr; + float corr; int i; corr = 0; @@ -1051,14 +1082,10 @@ double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *c norm -= mixingPos[-i] * mixingPos[-i]; } - // Same routine for stereo and mono. For Stereo, unroll by factor of 2. - // For mono it's same routine yet unrollsd by factor of 4. - for (i = 0; i < channels * overlapLength; i += 4) + // Same routine for stereo and mono + for (i = 0; i < channels * overlapLength; i ++) { - corr += mixingPos[i] * compare[i] + - mixingPos[i + 1] * compare[i + 1] + - mixingPos[i + 2] * compare[i + 2] + - mixingPos[i + 3] * compare[i + 3]; + corr += mixingPos[i] * compare[i]; } // update normalizer with last samples of this round