forked from mirrors/gecko-dev
		
	Bug 1442178 - Repair broken socket polling wakeup mechanism after a network change to prevent long load hangs, r=dragana
				
					
				
			This commit is contained in:
		
							parent
							
								
									fbc0928677
								
							
						
					
					
						commit
						dab22ca3b9
					
				
					 5 changed files with 158 additions and 19 deletions
				
			
		|  | @ -1770,6 +1770,7 @@ pref("network.http.focused_window_transaction_ratio", "0.9"); | |||
| // Whether or not we give more priority to active tab.
 | ||||
| // Note that this requires restart for changes to take effect.
 | ||||
| pref("network.http.active_tab_priority", true); | ||||
| // </http>
 | ||||
| 
 | ||||
| // default values for FTP
 | ||||
| // in a DSCP environment this should be 40 (0x28, or AF11), per RFC-4594,
 | ||||
|  | @ -1785,7 +1786,12 @@ pref("network.sts.max_time_for_events_between_two_polls", 100); | |||
| // During shutdown we limit PR_Close calls. If time exceeds this pref (in ms)
 | ||||
| // let sockets just leak.
 | ||||
| pref("network.sts.max_time_for_pr_close_during_shutdown", 5000); | ||||
| // </http>
 | ||||
| 
 | ||||
| // When the polling socket pair we use to wake poll() up on demand doesn't
 | ||||
| // get signalled (is not readable) within this timeout, we try to repair it.
 | ||||
| // This timeout can be disabled by setting this pref to 0.
 | ||||
| // The value is expected in seconds.
 | ||||
| pref("network.sts.pollable_event_timeout", 6); | ||||
| 
 | ||||
| // 2147483647 == PR_INT32_MAX == ~2 GB
 | ||||
| pref("network.websocket.max-message-size", 2147483647); | ||||
|  |  | |||
|  | @ -140,6 +140,8 @@ PollableEvent::PollableEvent() | |||
|   : mWriteFD(nullptr) | ||||
|   , mReadFD(nullptr) | ||||
|   , mSignaled(false) | ||||
|   , mWriteFailed(false) | ||||
|   , mSignalTimestampAdjusted(false) | ||||
| { | ||||
|   MOZ_COUNT_CTOR(PollableEvent); | ||||
|   MOZ_ASSERT(OnSocketThread(), "not on socket thread"); | ||||
|  | @ -218,6 +220,7 @@ PollableEvent::PollableEvent() | |||
|     // prime the system to deal with races invovled in [dc]tor cycle
 | ||||
|     SOCKET_LOG(("PollableEvent() ctor ok\n")); | ||||
|     mSignaled = true; | ||||
|     MarkFirstSignalTimestamp(); | ||||
|     PR_Write(mWriteFD, "I", 1); | ||||
|   } | ||||
| } | ||||
|  | @ -274,13 +277,20 @@ PollableEvent::Signal() | |||
|   } | ||||
| #endif | ||||
| 
 | ||||
|   mSignaled = true; | ||||
|   if (!mSignaled) { | ||||
|     mSignaled = true; | ||||
|     MarkFirstSignalTimestamp(); | ||||
|   } | ||||
| 
 | ||||
|   int32_t status = PR_Write(mWriteFD, "M", 1); | ||||
|   SOCKET_LOG(("PollableEvent::Signal PR_Write %d\n", status)); | ||||
|   if (status != 1) { | ||||
|     NS_WARNING("PollableEvent::Signal Failed\n"); | ||||
|     SOCKET_LOG(("PollableEvent::Signal Failed\n")); | ||||
|     mSignaled = false; | ||||
|     mWriteFailed = true; | ||||
|   } else { | ||||
|     mWriteFailed = false; | ||||
|   } | ||||
|   return (status == 1); | ||||
| } | ||||
|  | @ -292,11 +302,21 @@ PollableEvent::Clear() | |||
|   MOZ_ASSERT(OnSocketThread(), "not on socket thread"); | ||||
| 
 | ||||
|   SOCKET_LOG(("PollableEvent::Clear\n")); | ||||
| 
 | ||||
|   if (!mFirstSignalAfterClear.IsNull()) { | ||||
|     SOCKET_LOG(("PollableEvent::Clear time to signal %ums", | ||||
|                 (uint32_t)(TimeStamp::NowLoRes() - mFirstSignalAfterClear).ToMilliseconds())); | ||||
|   } | ||||
| 
 | ||||
|   mFirstSignalAfterClear = TimeStamp(); | ||||
|   mSignalTimestampAdjusted = false; | ||||
|   mSignaled = false; | ||||
| 
 | ||||
|   if (!mReadFD) { | ||||
|     SOCKET_LOG(("PollableEvent::Clear mReadFD is null\n")); | ||||
|     return false; | ||||
|   } | ||||
| 
 | ||||
|   char buf[2048]; | ||||
|   int32_t status; | ||||
| #ifdef XP_WIN | ||||
|  | @ -304,7 +324,7 @@ PollableEvent::Clear() | |||
|   // do not have any deadlock read from the socket as much as we can.
 | ||||
|   while (true) { | ||||
|     status = PR_Read(mReadFD, buf, 2048); | ||||
|     SOCKET_LOG(("PollableEvent::Signal PR_Read %d\n", status)); | ||||
|     SOCKET_LOG(("PollableEvent::Clear PR_Read %d\n", status)); | ||||
|     if (status == 0) { | ||||
|       SOCKET_LOG(("PollableEvent::Clear EOF!\n")); | ||||
|       return false; | ||||
|  | @ -321,7 +341,7 @@ PollableEvent::Clear() | |||
|   } | ||||
| #else | ||||
|   status = PR_Read(mReadFD, buf, 2048); | ||||
|   SOCKET_LOG(("PollableEvent::Signal PR_Read %d\n", status)); | ||||
|   SOCKET_LOG(("PollableEvent::Clear PR_Read %d\n", status)); | ||||
| 
 | ||||
|   if (status == 1) { | ||||
|     return true; | ||||
|  | @ -345,5 +365,47 @@ PollableEvent::Clear() | |||
| #endif //XP_WIN
 | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| void | ||||
| PollableEvent::MarkFirstSignalTimestamp() | ||||
| { | ||||
|   if (mFirstSignalAfterClear.IsNull()) { | ||||
|     SOCKET_LOG(("PollableEvent::MarkFirstSignalTimestamp")); | ||||
|     mFirstSignalAfterClear = TimeStamp::NowLoRes(); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| void | ||||
| PollableEvent::AdjustFirstSignalTimestamp() | ||||
| { | ||||
|   if (!mSignalTimestampAdjusted && !mFirstSignalAfterClear.IsNull()) { | ||||
|     SOCKET_LOG(("PollableEvent::AdjustFirstSignalTimestamp")); | ||||
|     mFirstSignalAfterClear = TimeStamp::NowLoRes(); | ||||
|     mSignalTimestampAdjusted = true; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| bool | ||||
| PollableEvent::IsSignallingAlive(TimeDuration const& timeout) | ||||
| { | ||||
|   if (mWriteFailed) { | ||||
|     return false; | ||||
|   } | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
|   // The timeout would be just a disturbance in a debug build.
 | ||||
|   return true; | ||||
| #else | ||||
|   if (!mSignaled || mFirstSignalAfterClear.IsNull() || timeout == TimeDuration()) { | ||||
|     return true; | ||||
|   } | ||||
| 
 | ||||
|   TimeDuration delay = (TimeStamp::NowLoRes() - mFirstSignalAfterClear); | ||||
|   bool timedOut = delay > timeout; | ||||
| 
 | ||||
|   return !timedOut; | ||||
| #endif // DEBUG
 | ||||
| } | ||||
| 
 | ||||
| } // namespace net
 | ||||
| } // namespace mozilla
 | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| #define PollableEvent_h__ | ||||
| 
 | ||||
| #include "mozilla/Mutex.h" | ||||
| #include "mozilla/TimeStamp.h" | ||||
| 
 | ||||
| namespace mozilla { | ||||
| namespace net { | ||||
|  | @ -21,15 +22,42 @@ public: | |||
| 
 | ||||
|   // Signal/Clear return false only if they fail
 | ||||
|   bool Signal(); | ||||
|   // This is called only when we get non-null out_flags for the socket pair
 | ||||
|   bool Clear(); | ||||
|   bool Valid() { return mWriteFD && mReadFD; } | ||||
| 
 | ||||
|   // We want to detect if writing to one of the socket pair sockets takes
 | ||||
|   // too long to be received by the other socket from the pair.
 | ||||
|   // Hence, we remember the timestamp of the earliest write by a call to
 | ||||
|   // MarkFirstSignalTimestamp() from Signal().  After waking up from poll()
 | ||||
|   // we check how long it took get the 'readable' signal on the socket pair.
 | ||||
|   void MarkFirstSignalTimestamp(); | ||||
|   // Called right before we enter poll() to exclude any possible delay between
 | ||||
|   // the earlist call to Signal() and entering poll() caused by processing
 | ||||
|   // of events dispatched to the socket transport thread.
 | ||||
|   void AdjustFirstSignalTimestamp(); | ||||
|   // This returns false on following conditions:
 | ||||
|   // - PR_Write has failed
 | ||||
|   // - no out_flags were signalled on the socket pair for too long after
 | ||||
|   //   the earliest Signal()
 | ||||
|   bool IsSignallingAlive(TimeDuration const& timeout); | ||||
| 
 | ||||
|   PRFileDesc *PollableFD() { return mReadFD; } | ||||
| 
 | ||||
| private: | ||||
|   PRFileDesc *mWriteFD; | ||||
|   PRFileDesc *mReadFD; | ||||
|   bool        mSignaled; | ||||
|   // true when PR_Write to the socket pair has failed (status < 1)
 | ||||
|   bool        mWriteFailed; | ||||
|   // Set true after AdjustFirstSignalTimestamp() was called
 | ||||
|   // Set false after Clear() was called
 | ||||
|   // Ensures shifting the timestamp before entering poll() only once
 | ||||
|   // between Clear()'ings.
 | ||||
|   bool        mSignalTimestampAdjusted; | ||||
|   // Timestamp of the first call to Signal() (or time we enter poll())
 | ||||
|   // that happened after the last Clear() call
 | ||||
|   TimeStamp   mFirstSignalAfterClear; | ||||
| }; | ||||
| 
 | ||||
| } // namespace net
 | ||||
|  |  | |||
|  | @ -53,6 +53,7 @@ static Atomic<PRThread*, Relaxed> gSocketThread; | |||
| #define MAX_TIME_BETWEEN_TWO_POLLS "network.sts.max_time_for_events_between_two_polls" | ||||
| #define TELEMETRY_PREF "toolkit.telemetry.enabled" | ||||
| #define MAX_TIME_FOR_PR_CLOSE_DURING_SHUTDOWN "network.sts.max_time_for_pr_close_during_shutdown" | ||||
| #define POLLABLE_EVENT_TIMEOUT "network.sts.pollable_event_timeout" | ||||
| 
 | ||||
| #define REPAIR_POLLABLE_EVENT_TIME 10 | ||||
| 
 | ||||
|  | @ -135,6 +136,7 @@ nsSocketTransportService::nsSocketTransportService() | |||
|     , mKeepaliveRetryIntervalS(1) | ||||
|     , mKeepaliveProbeCount(kDefaultTCPKeepCount) | ||||
|     , mKeepaliveEnabledPref(false) | ||||
|     , mPollableEventTimeout(TimeDuration::FromSeconds(6)) | ||||
|     , mServingPendingQueue(false) | ||||
|     , mMaxTimePerPollIter(100) | ||||
|     , mTelemetryEnabledPref(false) | ||||
|  | @ -606,6 +608,7 @@ nsSocketTransportService::Init() | |||
|         tmpPrefService->AddObserver(MAX_TIME_BETWEEN_TWO_POLLS, this, false); | ||||
|         tmpPrefService->AddObserver(TELEMETRY_PREF, this, false); | ||||
|         tmpPrefService->AddObserver(MAX_TIME_FOR_PR_CLOSE_DURING_SHUTDOWN, this, false); | ||||
|         tmpPrefService->AddObserver(POLLABLE_EVENT_TIMEOUT, this, false); | ||||
|     } | ||||
|     UpdatePrefs(); | ||||
| 
 | ||||
|  | @ -1159,6 +1162,20 @@ nsSocketTransportService::DoPollIteration(TimeDuration *pollDuration) | |||
|             MoveToPollList(&mIdleList[i]); | ||||
|     } | ||||
| 
 | ||||
|     { | ||||
|         MutexAutoLock lock(mLock); | ||||
|         if (mPollableEvent) { | ||||
|             // we want to make sure the timeout is measured from the time
 | ||||
|             // we enter poll().  This method resets the timestamp to 'now',
 | ||||
|             // if we were first signalled between leaving poll() and here.
 | ||||
|             // If we didn't do this and processing events took longer than
 | ||||
|             // the allowed signal timeout, we would detect it as a
 | ||||
|             // false-positive.  AdjustFirstSignalTimestamp is then a no-op
 | ||||
|             // until mPollableEvent->Clear() is called.
 | ||||
|             mPollableEvent->AdjustFirstSignalTimestamp(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     SOCKET_LOG(("  calling PR_Poll [active=%u idle=%u]\n", mActiveCount, mIdleCount)); | ||||
| 
 | ||||
| #if defined(XP_WIN) | ||||
|  | @ -1230,29 +1247,26 @@ nsSocketTransportService::DoPollIteration(TimeDuration *pollDuration) | |||
|                 DetachSocket(mActiveList, &mActiveList[i]); | ||||
|         } | ||||
| 
 | ||||
|         if (n != 0 && (mPollList[0].out_flags & (PR_POLL_READ | PR_POLL_EXCEPT))) { | ||||
|         { | ||||
|             MutexAutoLock lock(mLock); | ||||
| 
 | ||||
|             // acknowledge pollable event (should not block)
 | ||||
|             if (mPollableEvent && | ||||
|                 ((mPollList[0].out_flags & PR_POLL_EXCEPT) || | ||||
|                  !mPollableEvent->Clear())) { | ||||
|             if (n != 0 && | ||||
|                 (mPollList[0].out_flags & (PR_POLL_READ | PR_POLL_EXCEPT)) && | ||||
|                 mPollableEvent && | ||||
|                 ((mPollList[0].out_flags & PR_POLL_EXCEPT) || !mPollableEvent->Clear())) { | ||||
|                 // On Windows, the TCP loopback connection in the
 | ||||
|                 // pollable event may become broken when a laptop
 | ||||
|                 // switches between wired and wireless networks or
 | ||||
|                 // wakes up from hibernation.  We try to create a
 | ||||
|                 // new pollable event.  If that fails, we fall back
 | ||||
|                 // on "busy wait".
 | ||||
|                 NS_WARNING("Trying to repair mPollableEvent"); | ||||
|                 mPollableEvent.reset(new PollableEvent()); | ||||
|                 if (!mPollableEvent->Valid()) { | ||||
|                     mPollableEvent = nullptr; | ||||
|                 } | ||||
|                 SOCKET_LOG(("running socket transport thread without " | ||||
|                             "a pollable event now valid=%d", !!mPollableEvent)); | ||||
|                 mPollList[0].fd = mPollableEvent ? mPollableEvent->PollableFD() : nullptr; | ||||
|                 mPollList[0].in_flags = PR_POLL_READ | PR_POLL_EXCEPT; | ||||
|                 mPollList[0].out_flags = 0; | ||||
|                 TryRepairPollableEvent(); | ||||
|             } | ||||
| 
 | ||||
|             if (mPollableEvent && | ||||
|                 !mPollableEvent->IsSignallingAlive(mPollableEventTimeout)) { | ||||
|                 SOCKET_LOG(("Pollable event signalling failed/timed out")); | ||||
|                 TryRepairPollableEvent(); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | @ -1335,6 +1349,14 @@ nsSocketTransportService::UpdatePrefs() | |||
|         if (NS_SUCCEEDED(rv) && maxTimeForPrClosePref >=0) { | ||||
|             mMaxTimeForPrClosePref = PR_MillisecondsToInterval(maxTimeForPrClosePref); | ||||
|         } | ||||
| 
 | ||||
|         int32_t pollableEventTimeout; | ||||
|         rv = tmpPrefService->GetIntPref(POLLABLE_EVENT_TIMEOUT, | ||||
|                                         &pollableEventTimeout); | ||||
|         if (NS_SUCCEEDED(rv) && pollableEventTimeout >= 0) { | ||||
|             MutexAutoLock lock(mLock); | ||||
|             mPollableEventTimeout = TimeDuration::FromSeconds(pollableEventTimeout); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return NS_OK; | ||||
|  | @ -1695,7 +1717,24 @@ nsSocketTransportService::EndPolling() | |||
|         mPollRepairTimer->Cancel(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| void nsSocketTransportService::TryRepairPollableEvent() | ||||
| { | ||||
|     mLock.AssertCurrentThreadOwns(); | ||||
| 
 | ||||
|     NS_WARNING("Trying to repair mPollableEvent"); | ||||
|     mPollableEvent.reset(new PollableEvent()); | ||||
|     if (!mPollableEvent->Valid()) { | ||||
|         mPollableEvent = nullptr; | ||||
|     } | ||||
|     SOCKET_LOG(("running socket transport thread without " | ||||
|                 "a pollable event now valid=%d", !!mPollableEvent)); | ||||
|     mPollList[0].fd = mPollableEvent ? mPollableEvent->PollableFD() : nullptr; | ||||
|     mPollList[0].in_flags = PR_POLL_READ | PR_POLL_EXCEPT; | ||||
|     mPollList[0].out_flags = 0; | ||||
| } | ||||
| 
 | ||||
| } // namespace net
 | ||||
| } // namespace mozilla
 | ||||
|  |  | |||
|  | @ -247,6 +247,8 @@ private: | |||
|     int32_t     mKeepaliveProbeCount; | ||||
|     // True if TCP keepalive is enabled globally.
 | ||||
|     bool        mKeepaliveEnabledPref; | ||||
|     // Timeout of pollable event signalling.
 | ||||
|     TimeDuration mPollableEventTimeout; | ||||
| 
 | ||||
|     Atomic<bool>                    mServingPendingQueue; | ||||
|     Atomic<int32_t, Relaxed>        mMaxTimePerPollIter; | ||||
|  | @ -285,6 +287,8 @@ private: | |||
|     void StartPolling(); | ||||
|     void EndPolling(); | ||||
| #endif | ||||
| 
 | ||||
|     void TryRepairPollableEvent(); | ||||
| }; | ||||
| 
 | ||||
| extern nsSocketTransportService *gSocketTransportService; | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Honza Bambas
						Honza Bambas