diff --git a/media/libsoundtouch/moz.yaml b/media/libsoundtouch/moz.yaml
index b03bf306db58..b566f0d50fd0 100644
--- a/media/libsoundtouch/moz.yaml
+++ b/media/libsoundtouch/moz.yaml
@@ -17,4 +17,4 @@ origin:
   license: "LGPL-2.1"
 
   # From https://gitlab.com/soundtouch/soundtouch
-  release: v2.1.2
+  release: "a911a1e98689e763e1274f462c00e7c6fedd80f9 (2020-10-03 16:58:00 +0300)"
diff --git a/media/libsoundtouch/src/FIFOSampleBuffer.cpp b/media/libsoundtouch/src/FIFOSampleBuffer.cpp
index 5206746c0b22..ad368754667f 100644
--- a/media/libsoundtouch/src/FIFOSampleBuffer.cpp
+++ b/media/libsoundtouch/src/FIFOSampleBuffer.cpp
@@ -265,3 +265,11 @@ uint FIFOSampleBuffer::adjustAmountOfSamples(uint numSamples)
     }
     return samplesInBuffer;
 }
+
+
+/// Add silence to end of buffer
+void FIFOSampleBuffer::addSilent(uint nSamples)
+{
+    memset(ptrEnd(nSamples), 0, sizeof(SAMPLETYPE) * nSamples * channels);
+    samplesInBuffer += nSamples;
+}
diff --git a/media/libsoundtouch/src/FIFOSampleBuffer.h b/media/libsoundtouch/src/FIFOSampleBuffer.h
index f7623f2a88c4..537a7b87229c 100644
--- a/media/libsoundtouch/src/FIFOSampleBuffer.h
+++ b/media/libsoundtouch/src/FIFOSampleBuffer.h
@@ -170,6 +170,9 @@ public:
     /// allow trimming (downwards) amount of samples in pipeline.
     /// Returns adjusted amount of samples
     uint adjustAmountOfSamples(uint numSamples);
+
+    /// Add silence to end of buffer
+    void addSilent(uint nSamples);
 };
 
 }
diff --git a/media/libsoundtouch/src/FIRFilter.cpp b/media/libsoundtouch/src/FIRFilter.cpp
index 11af395c7e71..9a72ea52af91 100644
--- a/media/libsoundtouch/src/FIRFilter.cpp
+++ b/media/libsoundtouch/src/FIRFilter.cpp
@@ -96,17 +96,10 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
         suml = sumr = 0;
         ptr = src + j;
 
-        for (i = 0; i < length; i += 4) 
+        for (i = 0; i < length; i ++) 
         {
-            // loop is unrolled by factor of 4 here for efficiency
-            suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
-                    ptr[2 * i + 2] * filterCoeffs[i + 1] +
-                    ptr[2 * i + 4] * filterCoeffs[i + 2] +
-                    ptr[2 * i + 6] * filterCoeffs[i + 3];
-            sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
-                    ptr[2 * i + 3] * filterCoeffs[i + 1] +
-                    ptr[2 * i + 5] * filterCoeffs[i + 2] +
-                    ptr[2 * i + 7] * filterCoeffs[i + 3];
+            suml += ptr[2 * i] * filterCoeffs[i];
+            sumr += ptr[2 * i + 1] * filterCoeffs[i];
         }
 
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
@@ -148,13 +141,9 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
         uint i;
 
         sum = 0;
-        for (i = 0; i < length; i += 4) 
+        for (i = 0; i < length; i ++) 
         {
-            // loop is unrolled by factor of 4 here for efficiency
-            sum += pSrc[i + 0] * filterCoeffs[i + 0] + 
-                   pSrc[i + 1] * filterCoeffs[i + 1] + 
-                   pSrc[i + 2] * filterCoeffs[i + 2] + 
-                   pSrc[i + 3] * filterCoeffs[i + 3];
+            sum += pSrc[i] * filterCoeffs[i];
         }
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
         sum >>= resultDivFactor;
diff --git a/media/libsoundtouch/src/InterpolateCubic.h b/media/libsoundtouch/src/InterpolateCubic.h
index 7f84f280248b..9749bcd1ce40 100644
--- a/media/libsoundtouch/src/InterpolateCubic.h
+++ b/media/libsoundtouch/src/InterpolateCubic.h
@@ -56,6 +56,11 @@ protected:
 
 public:
     InterpolateCubic();
+
+    int getLatency() const
+    {
+        return 1;
+    }
 };
 
 }
diff --git a/media/libsoundtouch/src/InterpolateLinear.h b/media/libsoundtouch/src/InterpolateLinear.h
index 087dc2aecf51..ffc11bd80e97 100644
--- a/media/libsoundtouch/src/InterpolateLinear.h
+++ b/media/libsoundtouch/src/InterpolateLinear.h
@@ -60,6 +60,11 @@ public:
     /// Sets new target rate. Normal rate = 1.0, smaller values represent slower 
     /// rate, larger faster rates.
     virtual void setRate(double newRate);
+
+    int getLatency() const
+    {
+        return 0;
+    }
 };
 
 
@@ -81,6 +86,11 @@ protected:
 
 public:
     InterpolateLinearFloat();
+
+    int getLatency() const
+    {
+        return 0;
+    }
 };
 
 }
diff --git a/media/libsoundtouch/src/InterpolateShannon.h b/media/libsoundtouch/src/InterpolateShannon.h
index 2a59a84f191b..794e755108fb 100644
--- a/media/libsoundtouch/src/InterpolateShannon.h
+++ b/media/libsoundtouch/src/InterpolateShannon.h
@@ -61,6 +61,11 @@ protected:
 
 public:
     InterpolateShannon();
+
+    int getLatency() const
+    {
+        return 3;
+    }
 };
 
 }
diff --git a/media/libsoundtouch/src/RateTransposer.cpp b/media/libsoundtouch/src/RateTransposer.cpp
index 24d8f36b34dc..30a91d433222 100644
--- a/media/libsoundtouch/src/RateTransposer.cpp
+++ b/media/libsoundtouch/src/RateTransposer.cpp
@@ -61,6 +61,7 @@ RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer)
     // Instantiates the anti-alias filter
     pAAFilter = new AAFilter(64);
     pTransposer = TransposerBase::newInstance();
+    clear();
 }
 
 
@@ -192,6 +193,10 @@ void RateTransposer::clear()
     outputBuffer.clear();
     midBuffer.clear();
     inputBuffer.clear();
+
+    // prefill buffer to avoid losing first samples at beginning of stream
+    int prefill = getLatency();
+    inputBuffer.addSilent(prefill);
 }
 
 
@@ -209,7 +214,8 @@ int RateTransposer::isEmpty() const
 /// Return approximate initial input-output latency
 int RateTransposer::getLatency() const
 {
-    return (bUseAAFilter) ? pAAFilter->getLength() : 0;
+    return pTransposer->getLatency() +
+        ((bUseAAFilter) ? (pAAFilter->getLength() / 2) : 0);
 }
 
 
diff --git a/media/libsoundtouch/src/RateTransposer.h b/media/libsoundtouch/src/RateTransposer.h
index 5f31d1a6dde2..45e79cf33598 100644
--- a/media/libsoundtouch/src/RateTransposer.h
+++ b/media/libsoundtouch/src/RateTransposer.h
@@ -83,6 +83,7 @@ public:
     virtual int transpose(FIFOSampleBuffer &dest, FIFOSampleBuffer &src);
     virtual void setRate(double newRate);
     virtual void setChannels(int channels);
+    virtual int getLatency() const = 0;
 
     // static factory function
     static TransposerBase *newInstance();
diff --git a/media/libsoundtouch/src/TDStretch.cpp b/media/libsoundtouch/src/TDStretch.cpp
index 4abb574d046e..c19c2df42687 100644
--- a/media/libsoundtouch/src/TDStretch.cpp
+++ b/media/libsoundtouch/src/TDStretch.cpp
@@ -1,4 +1,4 @@
-////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
 /// 
 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
 /// while maintaining the original pitch by using a time domain WSOLA-like 
@@ -54,6 +54,10 @@ using namespace soundtouch;
 
 #define max(x, y) (((x) > (y)) ? (x) : (y))
 
+#if defined(SOUNDTOUCH_USE_NEON) && defined(SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION)
+    // SIMD mode, allow shortcuts to avoid operations that aren't aligned to 16-byte boundary
+    #define ST_SIMD_AVOID_UNALIGNED
+#endif
 
 /*****************************************************************************
  *
@@ -315,9 +319,10 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
     {
         double corr;
         // Calculates correlation value for the mixing position corresponding to 'i'
-#ifdef _OPENMP
+#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
         // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
         // iterate the loop in sequential order
+        // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
         corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
 #else
         // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
@@ -832,21 +837,19 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
 
 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
 // version of the routine.
-void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
+void TDStretch::overlapMulti(short *poutput, const short *input) const
 {
-    SAMPLETYPE m1=(SAMPLETYPE)0;
-    SAMPLETYPE m2;
-    int i=0;
+    short m1;
+    int i = 0;
 
-    for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
+    for (m1 = 0; m1 < overlapLength; m1 ++)
     {
+        short m2 = (short)(overlapLength - m1);
         for (int c = 0; c < channels; c ++)
         {
             poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2)  / overlapLength;
             i++;
         }
-
-        m1++;
     }
 }
 
@@ -891,20 +894,20 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
     unsigned long lnorm;
     int i;
 
+    #ifdef ST_SIMD_AVOID_UNALIGNED
+        // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
+        if (((ulongptr)mixingPos) & 15) return -1e50;
+    #endif
+
     corr = lnorm = 0;
-    // Same routine for stereo and mono. For stereo, unroll loop for better
-    // efficiency and gives slightly better resolution against rounding. 
-    // For mono it same routine, just  unrolls loop by factor of 4
-    for (i = 0; i < channels * overlapLength; i += 4) 
+    // Same routine for stereo and mono
+    for (i = 0; i < channels * overlapLength; i += 2)
     {
         corr += (mixingPos[i] * compare[i] + 
-                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;  // notice: do intermediate division here to avoid integer overflow
-        corr += (mixingPos[i + 2] * compare[i + 2] + 
-                mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
+                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
         lnorm += (mixingPos[i] * mixingPos[i] + 
-                mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
-        lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + 
-                mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm;
+                  mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
+        // do intermediate scalings to avoid integer overflow
     }
 
     if (lnorm > maxnorm)
@@ -926,7 +929,41 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
 double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
 {
-  return calcCrossCorr(mixingPos, compare, norm);
+    long corr;
+    long lnorm;
+    int i;
+
+    // cancel first normalizer tap from previous round
+    lnorm = 0;
+    for (i = 1; i <= channels; i ++)
+    {
+        lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
+    }
+
+    corr = 0;
+    // Same routine for stereo and mono.
+    for (i = 0; i < channels * overlapLength; i += 2) 
+    {
+        corr += (mixingPos[i] * compare[i] + 
+                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
+    }
+
+    // update normalizer with last samples of this round
+    for (int j = 0; j < channels; j ++)
+    {
+        i --;
+        lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
+    }
+
+    norm += (double)lnorm;
+    if (norm > maxnorm)
+    {
+        maxnorm = (unsigned long)norm;
+    }
+
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
 }
 
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
@@ -1009,27 +1046,21 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
 /// Calculate cross-correlation
 double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
 {
-    double corr;
-    double norm;
+    float corr;
+    float norm;
     int i;
 
+    #ifdef ST_SIMD_AVOID_UNALIGNED
+        // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
+        if (((ulongptr)mixingPos) & 15) return -1e50;
+    #endif
+
     corr = norm = 0;
-    // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
-    // For mono it's same routine yet unrollsd by factor of 4.
-    for (i = 0; i < channels * overlapLength; i += 4) 
+    // Same routine for stereo and mono
+    for (i = 0; i < channels * overlapLength; i ++)
     {
-        corr += mixingPos[i] * compare[i] +
-                mixingPos[i + 1] * compare[i + 1];
-
-        norm += mixingPos[i] * mixingPos[i] + 
-                mixingPos[i + 1] * mixingPos[i + 1];
-
-        // unroll the loop for better CPU efficiency:
-        corr += mixingPos[i + 2] * compare[i + 2] +
-                mixingPos[i + 3] * compare[i + 3];
-
-        norm += mixingPos[i + 2] * mixingPos[i + 2] +
-                mixingPos[i + 3] * mixingPos[i + 3];
+        corr += mixingPos[i] * compare[i];
+        norm += mixingPos[i] * mixingPos[i];
     }
 
     anorm = norm;
@@ -1040,7 +1071,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
 double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
 {
-    double corr;
+    float corr;
     int i;
 
     corr = 0;
@@ -1051,14 +1082,10 @@ double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *c
         norm -= mixingPos[-i] * mixingPos[-i];
     }
 
-    // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
-    // For mono it's same routine yet unrollsd by factor of 4.
-    for (i = 0; i < channels * overlapLength; i += 4) 
+    // Same routine for stereo and mono
+    for (i = 0; i < channels * overlapLength; i ++)
     {
-        corr += mixingPos[i] * compare[i] +
-                mixingPos[i + 1] * compare[i + 1] +
-                mixingPos[i + 2] * compare[i + 2] +
-                mixingPos[i + 3] * compare[i + 3];
+        corr += mixingPos[i] * compare[i];
     }
 
     // update normalizer with last samples of this round