From 24db8f79b37ac10ae9abd0ad924f63ca0822813d Mon Sep 17 00:00:00 2001
From: Karl Tomlinson <karlt+@karlt.net>
Date: Tue, 16 Jan 2024 00:09:30 +0000
Subject: [PATCH] Bug 1872519 pass planar reverse stream data to
 AudioProcessingTrack::NotifyOutputData() r=chunmin

to remove unnecessary deinterleaving.
This will facilitating passing the output for a secondary output device,
without interleaving.

The AudioChunk is down-mixed directly into the AudioProcessing's input buffer,
rather than using an AudioPacketizer, to skip another one or two copies.

processedFrameCount accounting in TestAudioCallbackDriver.SlowStart is
adjusted to ignore frames processed while waiting for the fallback driver to
stop [1] and to continue counting frames while the driver shuts down.

[1]
https://searchfox.org/mozilla-central/rev/6856d0cab9e37dd9eb305f174ff71f0a95b31f82/dom/media/GraphDriver.cpp#873-882

Depends on D198236

Differential Revision: https://phabricator.services.mozilla.com/D198237
---
 dom/media/GraphDriver.cpp                   |  12 --
 dom/media/GraphDriver.h                     |   4 -
 dom/media/MediaTrackGraph.cpp               |  11 +-
 dom/media/MediaTrackGraphImpl.h             |   3 +-
 dom/media/gtest/TestAudioCallbackDriver.cpp |  27 +++--
 dom/media/webrtc/MediaEngineWebRTCAudio.cpp | 121 ++++++++------------
 dom/media/webrtc/MediaEngineWebRTCAudio.h   |  24 ++--
 7 files changed, 83 insertions(+), 119 deletions(-)

diff --git a/dom/media/GraphDriver.cpp b/dom/media/GraphDriver.cpp
index d3c847a568ba..335818bbdb73 100644
--- a/dom/media/GraphDriver.cpp
+++ b/dom/media/GraphDriver.cpp
@@ -334,10 +334,6 @@ class AudioCallbackDriver::FallbackWrapper : public GraphInterface {
   bool OnThread() { return mFallbackDriver->OnThread(); }
 
   /* GraphInterface methods */
-  void NotifyOutputData(AudioDataValue* aBuffer, size_t aFrames,
-                        TrackRate aRate, uint32_t aChannels) override {
-    MOZ_CRASH("Unexpected NotifyOutputData from fallback SystemClockDriver");
-  }
   void NotifyInputStopped() override {
     MOZ_CRASH("Unexpected NotifyInputStopped from fallback SystemClockDriver");
   }
@@ -961,14 +957,6 @@ long AudioCallbackDriver::DataCallback(const AudioDataValue* aInputBuffer,
   NaNToZeroInPlace(aOutputBuffer, aFrames * mOutputChannelCount);
 #endif
 
-  // Callback any observers for the AEC speaker data.  Note that one
-  // (maybe) of these will be full-duplex, the others will get their input
-  // data off separate cubeb callbacks.  Take care with how stuff is
-  // removed/added to this list and TSAN issues, but input and output will
-  // use separate callback methods.
-  Graph()->NotifyOutputData(aOutputBuffer, static_cast<size_t>(aFrames),
-                            mSampleRate, mOutputChannelCount);
-
 #ifdef XP_MACOSX
   // This only happens when the output is on a macbookpro's external speaker,
   // that are stereo, but let's just be safe.
diff --git a/dom/media/GraphDriver.h b/dom/media/GraphDriver.h
index f871b97272d2..150fee0b3d8a 100644
--- a/dom/media/GraphDriver.h
+++ b/dom/media/GraphDriver.h
@@ -177,10 +177,6 @@ struct GraphInterface : public nsISupports {
     }
   };
 
-  /* Called on the graph thread when there is new output data for listeners.
-   * This is the mixed audio output of this MediaTrackGraph. */
-  virtual void NotifyOutputData(AudioDataValue* aBuffer, size_t aFrames,
-                                TrackRate aRate, uint32_t aChannels) = 0;
   /* Called on the graph thread after an AudioCallbackDriver with an input
    * stream has stopped. */
   virtual void NotifyInputStopped() = 0;
diff --git a/dom/media/MediaTrackGraph.cpp b/dom/media/MediaTrackGraph.cpp
index eb5004d4fe95..97c89df3c49a 100644
--- a/dom/media/MediaTrackGraph.cpp
+++ b/dom/media/MediaTrackGraph.cpp
@@ -897,9 +897,7 @@ void MediaTrackGraphImpl::CloseAudioInput(DeviceInputTrack* aTrack) {
 }
 
 // All AudioInput listeners get the same speaker data (at least for now).
-void MediaTrackGraphImpl::NotifyOutputData(AudioDataValue* aBuffer,
-                                           size_t aFrames, TrackRate aRate,
-                                           uint32_t aChannels) {
+void MediaTrackGraphImpl::NotifyOutputData(const AudioChunk& aChunk) {
   if (!mDeviceInputTrackManagerGraphThread.GetNativeInputTrack()) {
     return;
   }
@@ -907,7 +905,7 @@ void MediaTrackGraphImpl::NotifyOutputData(AudioDataValue* aBuffer,
 #if defined(MOZ_WEBRTC)
   for (const auto& track : mTracks) {
     if (const auto& t = track->AsAudioProcessingTrack()) {
-      t->NotifyOutputData(this, aBuffer, aFrames, aRate, aChannels);
+      t->NotifyOutputData(this, aChunk);
     }
   }
 #endif
@@ -1481,6 +1479,11 @@ void MediaTrackGraphImpl::Process(MixerCallbackReceiver* aMixerReceiver) {
     }
     AudioChunk* outputChunk = mMixer.MixedChunk();
     if (!outputDeviceEntry.mReceiver) {  // primary output
+      // Callback any observers for the AEC speaker data.  Note that one
+      // (maybe) of these will be full-duplex, the others will get their input
+      // data off separate cubeb callbacks.
+      NotifyOutputData(*outputChunk);
+
       aMixerReceiver->MixerCallback(outputChunk, mSampleRate);
     } else {
       outputDeviceEntry.mReceiver->EnqueueAudio(*outputChunk);
diff --git a/dom/media/MediaTrackGraphImpl.h b/dom/media/MediaTrackGraphImpl.h
index a0af7945dd2d..23327ad52ba8 100644
--- a/dom/media/MediaTrackGraphImpl.h
+++ b/dom/media/MediaTrackGraphImpl.h
@@ -480,8 +480,7 @@ class MediaTrackGraphImpl : public MediaTrackGraph,
 
   /* Called on the graph thread when there is new output data for listeners.
    * This is the mixed audio output of this MediaTrackGraph. */
-  void NotifyOutputData(AudioDataValue* aBuffer, size_t aFrames,
-                        TrackRate aRate, uint32_t aChannels) override;
+  void NotifyOutputData(const AudioChunk& aChunk);
   /* Called on the graph thread after an AudioCallbackDriver with an input
    * stream has stopped. */
   void NotifyInputStopped() override;
diff --git a/dom/media/gtest/TestAudioCallbackDriver.cpp b/dom/media/gtest/TestAudioCallbackDriver.cpp
index e7fd693672d4..1cbb3e22269a 100644
--- a/dom/media/gtest/TestAudioCallbackDriver.cpp
+++ b/dom/media/gtest/TestAudioCallbackDriver.cpp
@@ -26,8 +26,6 @@ class MockGraphInterface : public GraphInterface {
   NS_DECL_THREADSAFE_ISUPPORTS
   explicit MockGraphInterface(TrackRate aSampleRate)
       : mSampleRate(aSampleRate) {}
-  MOCK_METHOD4(NotifyOutputData,
-               void(AudioDataValue*, size_t, TrackRate, uint32_t));
   MOCK_METHOD0(NotifyInputStopped, void());
   MOCK_METHOD5(NotifyInputData, void(const AudioDataValue*, size_t, TrackRate,
                                      uint32_t, uint32_t));
@@ -107,8 +105,6 @@ MOZ_CAN_RUN_SCRIPT_FOR_DEFINITION {
   RefPtr<AudioCallbackDriver> driver;
   auto graph = MakeRefPtr<NiceMock<MockGraphInterface>>(rate);
   EXPECT_CALL(*graph, NotifyInputStopped).Times(0);
-  ON_CALL(*graph, NotifyOutputData)
-      .WillByDefault([&](AudioDataValue*, size_t, TrackRate, uint32_t) {});
 
   driver = MakeRefPtr<AudioCallbackDriver>(graph, nullptr, rate, 2, 0, nullptr,
                                            nullptr, AudioInputType::Unknown);
@@ -144,7 +140,6 @@ void TestSlowStart(const TrackRate aRate) MOZ_CAN_RUN_SCRIPT_FOR_DEFINITION {
   Maybe<int64_t> audioStart;
   Maybe<uint32_t> alreadyBuffered;
   int64_t inputFrameCount = 0;
-  int64_t outputFrameCount = 0;
   int64_t processedFrameCount = 0;
   ON_CALL(*graph, NotifyInputData)
       .WillByDefault([&](const AudioDataValue*, size_t aFrames, TrackRate,
@@ -152,6 +147,9 @@ void TestSlowStart(const TrackRate aRate) MOZ_CAN_RUN_SCRIPT_FOR_DEFINITION {
         if (!audioStart) {
           audioStart = Some(graph->StateComputedTime());
           alreadyBuffered = Some(aAlreadyBuffered);
+          // Reset processedFrameCount to ignore frames processed while waiting
+          // for the fallback driver to stop.
+          processedFrameCount = 0;
         }
         EXPECT_NEAR(inputFrameCount,
                     static_cast<int64_t>(graph->StateComputedTime() -
@@ -163,9 +161,6 @@ void TestSlowStart(const TrackRate aRate) MOZ_CAN_RUN_SCRIPT_FOR_DEFINITION {
             << ", alreadyBuffered=" << *alreadyBuffered;
         inputFrameCount += aFrames;
       });
-  ON_CALL(*graph, NotifyOutputData)
-      .WillByDefault([&](AudioDataValue*, size_t aFrames, TrackRate aRate,
-                         uint32_t) { outputFrameCount += aFrames; });
 
   driver = MakeRefPtr<AudioCallbackDriver>(graph, nullptr, aRate, 2, 2, nullptr,
                                            (void*)1, AudioInputType::Voice);
@@ -198,18 +193,22 @@ void TestSlowStart(const TrackRate aRate) MOZ_CAN_RUN_SCRIPT_FOR_DEFINITION {
         << "Fallback driver iteration <1s (sanity)";
     return graph->IterationCount() >= fallbackIterations;
   });
+
+  MediaEventListener processedListener = stream->FramesProcessedEvent().Connect(
+      AbstractThread::GetCurrent(),
+      [&](uint32_t aFrames) { processedFrameCount += aFrames; });
   stream->Thaw();
 
-  // Wait for at least 100ms of audio data.
-  WaitUntil(stream->FramesProcessedEvent(), [&](uint32_t aFrames) {
-    processedFrameCount += aFrames;
-    return processedFrameCount >= aRate / 10;
-  });
+  SpinEventLoopUntil(
+      "processed at least 100ms of audio data from stream callback"_ns, [&] {
+        return inputFrameCount != 0 && processedFrameCount >= aRate / 10;
+      });
 
   // This will block untill all events have been executed.
   MOZ_KnownLive(driver)->Shutdown();
+  processedListener.Disconnect();
 
-  EXPECT_EQ(inputFrameCount, outputFrameCount);
+  EXPECT_EQ(inputFrameCount, processedFrameCount);
   EXPECT_NEAR(graph->StateComputedTime() - *audioStart,
               inputFrameCount + *alreadyBuffered, WEBAUDIO_BLOCK_SIZE)
       << "Graph progresses while audio driver runs. stateComputedTime="
diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
index 74d0ba4abafd..ee45bb03174f 100644
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -681,89 +681,65 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom,
 }
 
 void AudioInputProcessing::ProcessOutputData(MediaTrackGraph* aGraph,
-                                             AudioDataValue* aBuffer,
-                                             size_t aFrames, TrackRate aRate,
-                                             uint32_t aChannels) {
+                                             const AudioChunk& aChunk) {
+  MOZ_ASSERT(aChunk.ChannelCount() > 0);
   aGraph->AssertOnGraphThread();
 
   if (!mEnabled || PassThrough(aGraph)) {
     return;
   }
 
-  if (!mPacketizerOutput ||
-      mPacketizerOutput->mPacketSize != GetPacketSize(aRate) ||
-      mPacketizerOutput->mChannels != aChannels) {
+  TrackRate sampleRate = aGraph->GraphRate();
+  uint32_t framesPerPacket = GetPacketSize(sampleRate);  // in frames
+  // Downmix from aChannels to MAX_CHANNELS if needed.
+  uint32_t channelCount =
+      std::min<uint32_t>(aChunk.ChannelCount(), MAX_CHANNELS);
+  if (channelCount != mOutputBufferChannelCount ||
+      channelCount * framesPerPacket != mOutputBuffer.Length()) {
+    mOutputBuffer.SetLength(channelCount * framesPerPacket);
+    mOutputBufferChannelCount = channelCount;
     // It's ok to drop the audio still in the packetizer here: if this changes,
     // we changed devices or something.
-    mPacketizerOutput = Nothing();
-    mPacketizerOutput.emplace(GetPacketSize(aRate), aChannels);
+    mOutputBufferFrameCount = 0;
   }
 
-  mPacketizerOutput->Input(aBuffer, aFrames);
+  TrackTime chunkOffset = 0;
+  AutoTArray<float*, MAX_CHANNELS> channelPtrs;
+  channelPtrs.SetLength(channelCount);
+  do {
+    MOZ_ASSERT(mOutputBufferFrameCount < framesPerPacket);
+    uint32_t packetRemainder = framesPerPacket - mOutputBufferFrameCount;
+    mSubChunk = aChunk;
+    mSubChunk.SliceTo(
+        chunkOffset, std::min(chunkOffset + packetRemainder, aChunk.mDuration));
+    MOZ_ASSERT(mSubChunk.mDuration <= packetRemainder);
 
-  while (mPacketizerOutput->PacketsAvailable()) {
-    uint32_t samplesPerPacket =
-        mPacketizerOutput->mPacketSize * mPacketizerOutput->mChannels;
-    if (mOutputBuffer.Length() < samplesPerPacket) {
-      mOutputBuffer.SetLength(samplesPerPacket);
+    for (uint32_t channel = 0; channel < channelCount; channel++) {
+      channelPtrs[channel] =
+          &mOutputBuffer[channel * framesPerPacket + mOutputBufferFrameCount];
     }
-    if (mDeinterleavedBuffer.Length() < samplesPerPacket) {
-      mDeinterleavedBuffer.SetLength(samplesPerPacket);
+    mSubChunk.DownMixTo(channelPtrs);
+
+    chunkOffset += mSubChunk.mDuration;
+    MOZ_ASSERT(chunkOffset <= aChunk.mDuration);
+    mOutputBufferFrameCount += mSubChunk.mDuration;
+    MOZ_ASSERT(mOutputBufferFrameCount <= framesPerPacket);
+
+    if (mOutputBufferFrameCount == framesPerPacket) {
+      // Have a complete packet.  Analyze it.
+      for (uint32_t channel = 0; channel < channelCount; channel++) {
+        channelPtrs[channel] = &mOutputBuffer[channel * framesPerPacket];
+      }
+      StreamConfig reverseConfig(sampleRate, channelCount);
+      DebugOnly<int> err = mAudioProcessing->AnalyzeReverseStream(
+          channelPtrs.Elements(), reverseConfig);
+      MOZ_ASSERT(!err, "Could not process the reverse stream.");
+
+      mOutputBufferFrameCount = 0;
     }
-    float* packet = mOutputBuffer.Data();
-    mPacketizerOutput->Output(packet);
+  } while (chunkOffset < aChunk.mDuration);
 
-    AutoTArray<float*, MAX_CHANNELS> deinterleavedPacketDataChannelPointers;
-    float* interleavedFarend = nullptr;
-    uint32_t channelCountFarend = 0;
-    uint32_t framesPerPacketFarend = 0;
-
-    // Downmix from aChannels to MAX_CHANNELS if needed. We always have
-    // floats here, the packetized performed the conversion.
-    if (aChannels > MAX_CHANNELS) {
-      AudioConverter converter(
-          AudioConfig(aChannels, 0, AudioConfig::FORMAT_FLT),
-          AudioConfig(MAX_CHANNELS, 0, AudioConfig::FORMAT_FLT));
-      framesPerPacketFarend = mPacketizerOutput->mPacketSize;
-      framesPerPacketFarend =
-          converter.Process(mInputDownmixBuffer, packet, framesPerPacketFarend);
-      interleavedFarend = mInputDownmixBuffer.Data();
-      channelCountFarend = MAX_CHANNELS;
-      deinterleavedPacketDataChannelPointers.SetLength(MAX_CHANNELS);
-    } else {
-      interleavedFarend = packet;
-      channelCountFarend = aChannels;
-      framesPerPacketFarend = mPacketizerOutput->mPacketSize;
-      deinterleavedPacketDataChannelPointers.SetLength(aChannels);
-    }
-
-    MOZ_ASSERT(interleavedFarend &&
-               (channelCountFarend == 1 || channelCountFarend == 2) &&
-               framesPerPacketFarend);
-
-    if (mInputBuffer.Length() < framesPerPacketFarend * channelCountFarend) {
-      mInputBuffer.SetLength(framesPerPacketFarend * channelCountFarend);
-    }
-
-    size_t offset = 0;
-    for (size_t i = 0; i < deinterleavedPacketDataChannelPointers.Length();
-         ++i) {
-      deinterleavedPacketDataChannelPointers[i] = mInputBuffer.Data() + offset;
-      offset += framesPerPacketFarend;
-    }
-
-    // Deinterleave, prepare a channel pointers array, with enough storage for
-    // the frames.
-    DeinterleaveAndConvertBuffer(
-        interleavedFarend, framesPerPacketFarend, channelCountFarend,
-        deinterleavedPacketDataChannelPointers.Elements());
-
-    StreamConfig reverseConfig(aRate, channelCountFarend);
-    DebugOnly<int> err = mAudioProcessing->AnalyzeReverseStream(
-        deinterleavedPacketDataChannelPointers.Elements(), reverseConfig);
-
-    MOZ_ASSERT(!err, "Could not process the reverse stream.");
-  }
+  mSubChunk.SetNull(0);
 }
 
 // Only called if we're not in passthrough mode
@@ -1162,14 +1138,11 @@ void AudioProcessingTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
 }
 
 void AudioProcessingTrack::NotifyOutputData(MediaTrackGraph* aGraph,
-                                            AudioDataValue* aBuffer,
-                                            size_t aFrames, TrackRate aRate,
-                                            uint32_t aChannels) {
+                                            const AudioChunk& aChunk) {
   MOZ_ASSERT(mGraph == aGraph, "Cannot feed audio output to another graph");
   AssertOnGraphThread();
   if (mInputProcessing) {
-    mInputProcessing->ProcessOutputData(aGraph, aBuffer, aFrames, aRate,
-                                        aChannels);
+    mInputProcessing->ProcessOutputData(aGraph, aChunk);
   }
 }
 
diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.h b/dom/media/webrtc/MediaEngineWebRTCAudio.h
index 2826eda2f887..e71b5ef826ae 100644
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.h
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.h
@@ -116,8 +116,7 @@ class AudioInputProcessing : public AudioDataListener {
   void Process(MediaTrackGraph* aGraph, GraphTime aFrom, GraphTime aTo,
                AudioSegment* aInput, AudioSegment* aOutput);
 
-  void ProcessOutputData(MediaTrackGraph* aGraph, AudioDataValue* aBuffer,
-                         size_t aFrames, TrackRate aRate, uint32_t aChannels);
+  void ProcessOutputData(MediaTrackGraph* aGraph, const AudioChunk& aChunk);
   bool IsVoiceInput(MediaTrackGraph* aGraph) const override {
     // If we're passing data directly without AEC or any other process, this
     // means that all voice-processing has been disabled intentionaly. In this
@@ -179,9 +178,6 @@ class AudioInputProcessing : public AudioDataListener {
   // Packetizer to be able to feed 10ms packets to the input side of
   // mAudioProcessing. Not used if the processing is bypassed.
   Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
-  // Packetizer to be able to feed 10ms packets to the output side of
-  // mAudioProcessing. Not used if the processing is bypassed.
-  Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerOutput;
   // The number of channels asked for by content, after clamping to the range of
   // legal channel count for this particular device.
   uint32_t mRequestedInputChannelCount;
@@ -189,9 +185,15 @@ class AudioInputProcessing : public AudioDataListener {
   // because of prefs or constraints. This allows simply copying the audio into
   // the MTG, skipping resampling and the whole webrtc.org code.
   bool mSkipProcessing;
-  // Stores the mixed audio output for the reverse-stream of the AEC (the
-  // speaker data).
+  // Buffer for up to one 10ms packet of planar mixed audio output for the
+  // reverse-stream (speaker data) of mAudioProcessing AEC.
+  // Length is packet size * channel count, regardless of how many frames are
+  // buffered.  Not used if the processing is bypassed.
   AlignedFloatBuffer mOutputBuffer;
+  // Number of channels into which mOutputBuffer is divided.
+  uint32_t mOutputBufferChannelCount = 0;
+  // Number of frames buffered in mOutputBuffer for the reverse stream.
+  uint32_t mOutputBufferFrameCount = 0;
   // Stores the input audio, to be processed by the APM.
   AlignedFloatBuffer mInputBuffer;
   // Stores the deinterleaved microphone audio
@@ -209,6 +211,11 @@ class AudioInputProcessing : public AudioDataListener {
   // When processing is enabled, the number of packets received by this
   // instance, to implement periodic logging.
   uint64_t mPacketCount;
+  // Temporary descriptor for a slice of an AudioChunk parameter passed to
+  // ProcessOutputData().  This is a member rather than on the stack so that
+  // any memory allocated for its mChannelData pointer array is not
+  // reallocated on each iteration.
+  AudioChunk mSubChunk;
   // A storage holding the interleaved audio data converted the AudioSegment.
   // This will be used as an input parameter for PacketizeAndProcess. This
   // should be removed once bug 1729041 is done.
@@ -246,8 +253,7 @@ class AudioProcessingTrack : public DeviceInputConsumerTrack {
   }
   // Pass the graph's mixed audio output to mInputProcessing for processing as
   // the reverse stream.
-  void NotifyOutputData(MediaTrackGraph* aGraph, AudioDataValue* aBuffer,
-                        size_t aFrames, TrackRate aRate, uint32_t aChannels);
+  void NotifyOutputData(MediaTrackGraph* aGraph, const AudioChunk& aChunk);
 
   // Any thread
   AudioProcessingTrack* AsAudioProcessingTrack() override { return this; }