forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			766 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			766 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| #ifndef nsHtml5StreamParser_h
 | |
| #define nsHtml5StreamParser_h
 | |
| 
 | |
| #include <tuple>
 | |
| 
 | |
| #include "MainThreadUtils.h"
 | |
| #include "mozilla/AlreadyAddRefed.h"
 | |
| #include "mozilla/Assertions.h"
 | |
| #include "mozilla/Encoding.h"
 | |
| #include "mozilla/Mutex.h"
 | |
| #include "mozilla/NotNull.h"
 | |
| #include "mozilla/RefPtr.h"
 | |
| #include "mozilla/Span.h"
 | |
| #include "mozilla/UniquePtr.h"
 | |
| #include "nsCharsetSource.h"
 | |
| #include "nsCOMPtr.h"
 | |
| #include "nsCycleCollectionParticipant.h"
 | |
| #include "nsDebug.h"
 | |
| #include "nsHtml5AtomTable.h"
 | |
| #include "nsIRequestObserver.h"
 | |
| #include "nsISerialEventTarget.h"
 | |
| #include "nsISupports.h"
 | |
| #include "nsStringFwd.h"
 | |
| #include "nsTArray.h"
 | |
| #include "nscore.h"
 | |
| 
 | |
| class nsCycleCollectionTraversalCallback;
 | |
| class nsHtml5OwningUTF16Buffer;
 | |
| class nsHtml5Parser;
 | |
| class nsHtml5Speculation;
 | |
| class nsHtml5String;
 | |
| class nsHtml5Tokenizer;
 | |
| class nsHtml5TreeBuilder;
 | |
| class nsHtml5TreeOpExecutor;
 | |
| class nsIChannel;
 | |
| class nsIInputStream;
 | |
| class nsIRequest;
 | |
| class nsIRunnable;
 | |
| class nsITimer;
 | |
| class nsIURI;
 | |
| 
 | |
| namespace mozilla {
 | |
| class EncodingDetector;
 | |
| template <typename T>
 | |
| class Buffer;
 | |
| 
 | |
| namespace dom {
 | |
| class DocGroup;
 | |
| }
 | |
| }  // namespace mozilla
 | |
| 
 | |
| enum eParserMode {
 | |
|   /**
 | |
|    * Parse a document normally as HTML.
 | |
|    */
 | |
|   NORMAL,
 | |
| 
 | |
|   /**
 | |
|    * View document as HTML source.
 | |
|    */
 | |
|   VIEW_SOURCE_HTML,
 | |
| 
 | |
|   /**
 | |
|    * View document as XML source
 | |
|    */
 | |
|   VIEW_SOURCE_XML,
 | |
| 
 | |
|   /**
 | |
|    * View document as plain text source
 | |
|    */
 | |
|   VIEW_SOURCE_PLAIN,
 | |
| 
 | |
|   /**
 | |
|    * View document as plain text
 | |
|    */
 | |
|   PLAIN_TEXT,
 | |
| 
 | |
|   /**
 | |
|    * Load as data (XHR)
 | |
|    */
 | |
|   LOAD_AS_DATA
 | |
| };
 | |
| 
 | |
| enum eBomState {
 | |
|   /**
 | |
|    * BOM sniffing hasn't started.
 | |
|    */
 | |
|   BOM_SNIFFING_NOT_STARTED,
 | |
| 
 | |
|   /**
 | |
|    * BOM sniffing is ongoing, and the first byte of an UTF-16LE BOM has been
 | |
|    * seen.
 | |
|    */
 | |
|   SEEN_UTF_16_LE_FIRST_BYTE,
 | |
| 
 | |
|   /**
 | |
|    * BOM sniffing is ongoing, and the first byte of an UTF-16BE BOM has been
 | |
|    * seen.
 | |
|    */
 | |
|   SEEN_UTF_16_BE_FIRST_BYTE,
 | |
| 
 | |
|   /**
 | |
|    * BOM sniffing is ongoing, and the first byte of an UTF-8 BOM has been
 | |
|    * seen.
 | |
|    */
 | |
|   SEEN_UTF_8_FIRST_BYTE,
 | |
| 
 | |
|   /**
 | |
|    * BOM sniffing is ongoing, and the first and second bytes of an UTF-8 BOM
 | |
|    * have been seen.
 | |
|    */
 | |
|   SEEN_UTF_8_SECOND_BYTE,
 | |
| 
 | |
|   /**
 | |
|    * Seen \x00 in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_BE_XML_FIRST,
 | |
| 
 | |
|   /**
 | |
|    * Seen \x00< in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_BE_XML_SECOND,
 | |
| 
 | |
|   /**
 | |
|    * Seen \x00<\x00 in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_BE_XML_THIRD,
 | |
| 
 | |
|   /**
 | |
|    * Seen \x00<\x00? in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_BE_XML_FOURTH,
 | |
| 
 | |
|   /**
 | |
|    * Seen \x00<\x00?\x00 in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_BE_XML_FIFTH,
 | |
| 
 | |
|   /**
 | |
|    * Seen < in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_LE_XML_FIRST,
 | |
| 
 | |
|   /**
 | |
|    * Seen <\x00 in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_LE_XML_SECOND,
 | |
| 
 | |
|   /**
 | |
|    * Seen <\x00? in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_LE_XML_THIRD,
 | |
| 
 | |
|   /**
 | |
|    * Seen <\x00?\x00 in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_LE_XML_FOURTH,
 | |
| 
 | |
|   /**
 | |
|    * Seen <\x00?\x00x in UTF-16BE bogo-XML declaration.
 | |
|    */
 | |
|   SEEN_UTF_16_LE_XML_FIFTH,
 | |
| 
 | |
|   /**
 | |
|    * BOM sniffing was started but is now over for whatever reason.
 | |
|    */
 | |
|   BOM_SNIFFING_OVER,
 | |
| };
 | |
| 
 | |
| enum eHtml5StreamState {
 | |
|   STREAM_NOT_STARTED = 0,
 | |
|   STREAM_BEING_READ = 1,
 | |
|   STREAM_ENDED = 2
 | |
| };
 | |
| 
 | |
| class nsHtml5StreamParser final : public nsISupports {
 | |
|   template <typename T>
 | |
|   using NotNull = mozilla::NotNull<T>;
 | |
|   using Encoding = mozilla::Encoding;
 | |
| 
 | |
|   const uint32_t UNCONDITIONAL_META_SCAN_BOUNDARY = 1024;
 | |
|   const uint32_t READ_BUFFER_SIZE = 1024;
 | |
|   const uint32_t LOCAL_FILE_UTF_8_BUFFER_SIZE = 1024 * 1024 * 4;  // 4 MB
 | |
| 
 | |
|   friend class nsHtml5RequestStopper;
 | |
|   friend class nsHtml5DataAvailable;
 | |
|   friend class nsHtml5StreamParserContinuation;
 | |
|   friend class nsHtml5TimerKungFu;
 | |
|   friend class nsHtml5StreamParserPtr;
 | |
|   friend class nsHtml5StreamListener;
 | |
| 
 | |
|  public:
 | |
|   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
 | |
|   NS_DECL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)
 | |
| 
 | |
|   nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, nsHtml5Parser* aOwner,
 | |
|                       eParserMode aMode);
 | |
| 
 | |
|   nsresult OnStartRequest(nsIRequest* aRequest);
 | |
| 
 | |
|   nsresult OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInStream,
 | |
|                            uint64_t aSourceOffset, uint32_t aLength);
 | |
| 
 | |
|   nsresult OnStopRequest(nsIRequest* aRequest, nsresult status);
 | |
| 
 | |
|   // EncodingDeclarationHandler
 | |
|   // https://hg.mozilla.org/projects/htmlparser/file/tip/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java
 | |
|   /**
 | |
|    * Tree builder uses this to report a late <meta charset>
 | |
|    */
 | |
|   bool internalEncodingDeclaration(nsHtml5String aEncoding);
 | |
| 
 | |
|   bool TemplatePushedOrHeadPopped();
 | |
| 
 | |
|   void RememberGt(int32_t aPos);
 | |
| 
 | |
|   // Not from an external interface
 | |
| 
 | |
|   /**
 | |
|    * Post a runnable to the main thread to perform the speculative load
 | |
|    * operations without performing the tree operations.
 | |
|    *
 | |
|    * This should be called at the end of each data available or stop
 | |
|    * request runnable running on the parser thread.
 | |
|    */
 | |
|   void PostLoadFlusher();
 | |
| 
 | |
|   /**
 | |
|    * Pass a buffer to chardetng.
 | |
|    */
 | |
|   void FeedDetector(mozilla::Span<const uint8_t> aBuffer);
 | |
| 
 | |
|   /**
 | |
|    * Report EOF to chardetng.
 | |
|    */
 | |
|   void DetectorEof();
 | |
| 
 | |
|   /**
 | |
|    *  Call this method once you've created a parser, and want to instruct it
 | |
|    *  about what charset to load
 | |
|    *
 | |
|    *  @param   aEncoding the charset of a document
 | |
|    *  @param   aCharsetSource the source of the charset
 | |
|    */
 | |
|   inline void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
 | |
|                                  nsCharsetSource aSource,
 | |
|                                  bool aForceAutoDetection) {
 | |
|     MOZ_ASSERT(mStreamState == STREAM_NOT_STARTED,
 | |
|                "SetDocumentCharset called too late.");
 | |
|     MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
 | |
|     MOZ_ASSERT(!(aForceAutoDetection && aSource >= kCharsetFromOtherComponent),
 | |
|                "Can't force with high-ranking source.");
 | |
|     mEncoding = aEncoding;
 | |
|     mCharsetSource = aSource;
 | |
|     mForceAutoDetection = aForceAutoDetection;
 | |
|     mChannelHadCharset = (aSource == kCharsetFromChannel);
 | |
|   }
 | |
| 
 | |
|   nsresult GetChannel(nsIChannel** aChannel);
 | |
| 
 | |
|   /**
 | |
|    * The owner parser must call this after script execution
 | |
|    * when no scripts are executing and the document.written
 | |
|    * buffer has been exhausted.
 | |
|    *
 | |
|    * If the first two arguments are nullptr, instead of
 | |
|    * continuing after scripts, this method commits to an
 | |
|    * internally-discovered encoding.
 | |
|    */
 | |
|   void ContinueAfterScriptsOrEncodingCommitment(
 | |
|       nsHtml5Tokenizer* aTokenizer, nsHtml5TreeBuilder* aTreeBuilder,
 | |
|       bool aLastWasCR);
 | |
| 
 | |
|   /**
 | |
|    * Continues the stream parser if the charset switch failed.
 | |
|    */
 | |
|   void ContinueAfterFailedCharsetSwitch();
 | |
| 
 | |
|   void Terminate() { mTerminated = true; }
 | |
| 
 | |
|   void DropTimer();
 | |
| 
 | |
|   /**
 | |
|    * Sets the URL for View Source title in case this parser ends up being
 | |
|    * used for View Source. If aURL is a view-source: URL, takes the inner
 | |
|    * URL. data: URLs are shown with an ellipsis instead of the actual data.
 | |
|    */
 | |
|   void SetViewSourceTitle(nsIURI* aURL);
 | |
| 
 | |
|  private:
 | |
|   virtual ~nsHtml5StreamParser();
 | |
| 
 | |
| #ifdef DEBUG
 | |
|   bool IsParserThread() { return mEventTarget->IsOnCurrentThread(); }
 | |
| #endif
 | |
| 
 | |
|   void MarkAsBroken(nsresult aRv);
 | |
| 
 | |
|   /**
 | |
|    * Marks the stream parser as interrupted. If you ever add calls to this
 | |
|    * method, be sure to review Uninterrupt usage very, very carefully to
 | |
|    * avoid having a previous in-flight runnable cancel your Interrupt()
 | |
|    * call on the other thread too soon.
 | |
|    */
 | |
|   void Interrupt() {
 | |
|     MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
 | |
|     mInterrupted = true;
 | |
|   }
 | |
| 
 | |
|   void Uninterrupt() MOZ_NO_THREAD_SAFETY_ANALYSIS {
 | |
|     MOZ_ASSERT(IsParserThread(), "Wrong thread!");
 | |
|     mTokenizerMutex.AssertCurrentThreadOwns();
 | |
|     mInterrupted = false;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Flushes the tree ops from the tree builder and disarms the flush
 | |
|    * timer.
 | |
|    */
 | |
|   void FlushTreeOpsAndDisarmTimer();
 | |
| 
 | |
|   void SwitchDecoderIfAsciiSoFar(NotNull<const Encoding*> aEncoding)
 | |
|       MOZ_REQUIRES(mTokenizerMutex);
 | |
|   ;
 | |
| 
 | |
|   size_t CountGts();
 | |
| 
 | |
|   void DiscardMetaSpeculation();
 | |
| 
 | |
|   bool ProcessLookingForMetaCharset(bool aEof) MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   void ParseAvailableData();
 | |
| 
 | |
|   void DoStopRequest();
 | |
| 
 | |
|   void DoDataAvailableBuffer(mozilla::Buffer<uint8_t>&& aBuffer)
 | |
|       MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   void DoDataAvailable(mozilla::Span<const uint8_t> aBuffer)
 | |
|       MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   static nsresult CopySegmentsToParser(nsIInputStream* aInStream,
 | |
|                                        void* aClosure, const char* aFromSegment,
 | |
|                                        uint32_t aToOffset, uint32_t aCount,
 | |
|                                        uint32_t* aWriteCount)
 | |
|       MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   bool IsTerminatedOrInterrupted() { return mTerminated || mInterrupted; }
 | |
| 
 | |
|   bool IsTerminated() { return mTerminated; }
 | |
| 
 | |
|   /**
 | |
|    * True when there is a Unicode decoder already
 | |
|    */
 | |
|   inline bool HasDecoder() { return !!mUnicodeDecoder; }
 | |
| 
 | |
|   /**
 | |
|    * Returns 0 if 1) there aren't at least 2 buffers in mBufferedBytes
 | |
|    * or 2) there is no byte '>' in the second buffer.
 | |
|    * Otherwise, returns the length of the prefix of the second buffer
 | |
|    * that is long enough to contain the first byte '>' in the second
 | |
|    * buffer (including the '>' byte).
 | |
|    */
 | |
|   size_t LengthOfLtContainingPrefixInSecondBuffer();
 | |
| 
 | |
|   /**
 | |
|    * Push bytes from network when there is no Unicode decoder yet
 | |
|    */
 | |
|   nsresult SniffStreamBytes(mozilla::Span<const uint8_t> aFromSegment,
 | |
|                             bool aEof) MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   /**
 | |
|    * Push bytes from network when there is a Unicode decoder already
 | |
|    */
 | |
|   nsresult WriteStreamBytes(mozilla::Span<const uint8_t> aFromSegment)
 | |
|       MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   /**
 | |
|    * Set up the Unicode decoder and write the sniffing buffer into it
 | |
|    * followed by the current network buffer.
 | |
|    *
 | |
|    * @param aPrefix the part of the stream that has already been seen
 | |
|    *                prior to aFromSegment. In practice, these are the
 | |
|    *                bytes that are baked into the state of the BOM
 | |
|    *                and UTF-16 XML declaration-like sniffing state
 | |
|    *                machine state.
 | |
|    * @param aFromSegment The current network buffer
 | |
|    */
 | |
|   nsresult SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
 | |
|       mozilla::Span<const uint8_t> aPrefix,
 | |
|       mozilla::Span<const uint8_t> aFromSegment) MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   /**
 | |
|    * Initialize the Unicode decoder, mark the BOM as the source and
 | |
|    * drop the sniffer.
 | |
|    *
 | |
|    * @param aDecoderCharsetName The name for the decoder's charset
 | |
|    *                            (UTF-16BE, UTF-16LE or UTF-8; the BOM has
 | |
|    *                            been swallowed)
 | |
|    */
 | |
|   void SetupDecodingFromBom(NotNull<const Encoding*> aEncoding);
 | |
| 
 | |
|   void SetupDecodingFromUtf16BogoXml(NotNull<const Encoding*> aEncoding);
 | |
| 
 | |
|   /**
 | |
|    * When speculatively decoding from file: URL as UTF-8, commit
 | |
|    * to UTF-8 as the non-speculative encoding and start processing
 | |
|    * the decoded data.
 | |
|    */
 | |
|   [[nodiscard]] nsresult CommitLocalFileToEncoding();
 | |
| 
 | |
|   /**
 | |
|    * When speculatively decoding from file: URL as UTF-8, redecode
 | |
|    * using fallback and then continue normally with the fallback.
 | |
|    */
 | |
|   [[nodiscard]] nsresult ReDecodeLocalFile() MOZ_REQUIRES(mTokenizerMutex);
 | |
| 
 | |
|   /**
 | |
|    * Potentially guess the encoding using mozilla::EncodingDetector.
 | |
|    * Returns the guessed encoding and a telemetry-appropriate source.
 | |
|    */
 | |
|   std::tuple<NotNull<const Encoding*>, nsCharsetSource> GuessEncoding(
 | |
|       bool aInitial);
 | |
| 
 | |
|   /**
 | |
|    * Become confident or resolve and encoding name to its preferred form.
 | |
|    * @param aEncoding the value of an internal encoding decl. Acts as an
 | |
|    *                  out param, too, when the method returns true.
 | |
|    * @return true if the parser needs to start using the new value of
 | |
|    *         aEncoding and false if the parser became confident or if
 | |
|    *         the encoding name did not specify a usable encoding
 | |
|    */
 | |
|   const Encoding* PreferredForInternalEncodingDecl(const nsAString& aEncoding);
 | |
| 
 | |
|   /**
 | |
|    * Callback for mFlushTimer.
 | |
|    */
 | |
|   static void TimerCallback(nsITimer* aTimer, void* aClosure);
 | |
| 
 | |
|   /**
 | |
|    * Parser thread entry point for (maybe) flushing the ops and posting
 | |
|    * a flush runnable back on the main thread.
 | |
|    */
 | |
|   void TimerFlush();
 | |
| 
 | |
|   /**
 | |
|    * Called when speculation fails.
 | |
|    */
 | |
|   void MaybeDisableFutureSpeculation() { mSpeculationFailureCount++; }
 | |
| 
 | |
|   /**
 | |
|    * Used to check whether we're getting too many speculation failures and
 | |
|    * should just stop trying.  The 100 is picked pretty randomly to be not too
 | |
|    * small (so most pages are not affected) but small enough that we don't end
 | |
|    * up with failed speculations over and over in pathological cases.
 | |
|    */
 | |
|   bool IsSpeculationEnabled() { return mSpeculationFailureCount < 100; }
 | |
| 
 | |
|   /**
 | |
|    * Dispatch an event to a Quantum DOM main thread-ish thread.
 | |
|    * (Not the parser thread.)
 | |
|    */
 | |
|   nsresult DispatchToMain(already_AddRefed<nsIRunnable>&& aRunnable);
 | |
| 
 | |
|   /**
 | |
|    * Notify any devtools listeners about content newly received for parsing.
 | |
|    */
 | |
|   inline void OnNewContent(mozilla::Span<const char16_t> aData);
 | |
| 
 | |
|   /**
 | |
|    * Notify any devtools listeners after all parse content has been received.
 | |
|    */
 | |
|   inline void OnContentComplete();
 | |
| 
 | |
|   nsCOMPtr<nsIRequest> mRequest;
 | |
| 
 | |
|   /**
 | |
|    * The document title to use if this turns out to be a View Source parser.
 | |
|    */
 | |
|   nsCString mViewSourceTitle;
 | |
| 
 | |
|   /**
 | |
|    * The Unicode decoder
 | |
|    */
 | |
|   mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;
 | |
| 
 | |
|   /**
 | |
|    * BOM sniffing state
 | |
|    */
 | |
|   eBomState mBomState;
 | |
| 
 | |
|   // encoding-related stuff
 | |
|   /**
 | |
|    * The source (confidence) of the character encoding in use
 | |
|    */
 | |
|   nsCharsetSource mCharsetSource;
 | |
| 
 | |
|   nsCharsetSource mEncodingSwitchSource;
 | |
| 
 | |
|   /**
 | |
|    * The character encoding in use
 | |
|    */
 | |
|   NotNull<const Encoding*> mEncoding;
 | |
| 
 | |
|   const Encoding* mNeedsEncodingSwitchTo;
 | |
| 
 | |
|   bool mSeenEligibleMetaCharset;
 | |
| 
 | |
|   bool mChardetEof;
 | |
| 
 | |
| #ifdef DEBUG
 | |
| 
 | |
|   bool mStartedFeedingDetector;
 | |
| 
 | |
|   bool mStartedFeedingDevTools;
 | |
| 
 | |
| #endif
 | |
| 
 | |
|   /**
 | |
|    * Whether reparse is forbidden
 | |
|    */
 | |
|   bool mReparseForbidden;
 | |
| 
 | |
|   /**
 | |
|    * Whether the Repair Text Encoding menu item was invoked
 | |
|    */
 | |
|   bool mForceAutoDetection;
 | |
| 
 | |
|   /**
 | |
|    * Whether there was a valid charset parameter on the HTTP layer.
 | |
|    */
 | |
|   bool mChannelHadCharset;
 | |
| 
 | |
|   /**
 | |
|    * We are in the process of looking for <meta charset>
 | |
|    */
 | |
|   bool mLookingForMetaCharset;
 | |
| 
 | |
|   /**
 | |
|    * Whether the byte stream started with ASCII <?
 | |
|    */
 | |
|   bool mStartsWithLtQuestion;
 | |
| 
 | |
|   /**
 | |
|    * If we are viewing XML source and are waiting for a '>' form the network.
 | |
|    */
 | |
|   bool mLookingForXmlDeclarationForXmlViewSource;
 | |
| 
 | |
|   /**
 | |
|    * Whether template has been pushed or head popped within the first 1024
 | |
|    * bytes.
 | |
|    */
 | |
|   bool mTemplatePushedOrHeadPopped;
 | |
| 
 | |
|   // Portable parser objects
 | |
|   /**
 | |
|    * The first buffer in the pending UTF-16 buffer queue
 | |
|    */
 | |
|   RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer;
 | |
| 
 | |
|   /**
 | |
|    * Non-owning pointer to the most recent buffer that contains the most recent
 | |
|    * remembered greater-than sign. Used only while mLookingForMetaCharset is
 | |
|    * true. While mLookingForMetaCharset is true, mFirstBuffer is not changed and
 | |
|    * keeps the whole linked list of buffers alive. This pointer is non-owning to
 | |
|    * avoid frequent refcounting.
 | |
|    */
 | |
|   nsHtml5OwningUTF16Buffer* mGtBuffer;
 | |
| 
 | |
|   int32_t mGtPos;
 | |
| 
 | |
|   /**
 | |
|    * The last buffer in the pending UTF-16 buffer queue
 | |
|    */
 | |
|   nsHtml5OwningUTF16Buffer*
 | |
|       mLastBuffer;  // weak ref; always points to
 | |
|                     // a buffer of the size
 | |
|                     // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
 | |
| 
 | |
|   /**
 | |
|    * The first buffer of the document if looking for <meta charset> or
 | |
|    * nullptr afterwards.
 | |
|    */
 | |
|   RefPtr<nsHtml5OwningUTF16Buffer> mFirstBufferOfMetaScan;
 | |
| 
 | |
|   /**
 | |
|    * The tree operation executor
 | |
|    */
 | |
|   nsHtml5TreeOpExecutor* mExecutor;
 | |
| 
 | |
|   /**
 | |
|    * Network event target for mExecutor->mDocument
 | |
|    */
 | |
|   nsCOMPtr<nsISerialEventTarget> mNetworkEventTarget;
 | |
| 
 | |
|   /**
 | |
|    * The HTML5 tree builder
 | |
|    */
 | |
|   mozilla::UniquePtr<nsHtml5TreeBuilder> mTreeBuilder;
 | |
| 
 | |
|   /**
 | |
|    * The HTML5 tokenizer
 | |
|    */
 | |
|   mozilla::UniquePtr<nsHtml5Tokenizer> mTokenizer;
 | |
| 
 | |
|   /**
 | |
|    * Makes sure the main thread can't mess the tokenizer state while it's
 | |
|    * tokenizing. This mutex also protects the current speculation.
 | |
|    */
 | |
|   mozilla::Mutex mTokenizerMutex;
 | |
| 
 | |
|   /**
 | |
|    * The scoped atom table
 | |
|    */
 | |
|   nsHtml5AtomTable mAtomTable;
 | |
| 
 | |
|   /**
 | |
|    * The owner parser.
 | |
|    */
 | |
|   RefPtr<nsHtml5Parser> mOwner;
 | |
| 
 | |
|   /**
 | |
|    * Whether the last character tokenized was a carriage return (for CRLF)
 | |
|    */
 | |
|   bool mLastWasCR;
 | |
| 
 | |
|   /**
 | |
|    * For tracking stream life cycle
 | |
|    */
 | |
|   eHtml5StreamState mStreamState;
 | |
| 
 | |
|   /**
 | |
|    * Whether we are speculating.
 | |
|    */
 | |
|   bool mSpeculating;
 | |
| 
 | |
|   /**
 | |
|    * Whether the tokenizer has reached EOF. (Reset when stream rewinded.)
 | |
|    */
 | |
|   bool mAtEOF;
 | |
| 
 | |
|   /**
 | |
|    * The speculations. The mutex protects the nsTArray itself.
 | |
|    * To access the queue of current speculation, mTokenizerMutex must be
 | |
|    * obtained.
 | |
|    * The current speculation is the last element
 | |
|    */
 | |
|   nsTArray<mozilla::UniquePtr<nsHtml5Speculation>> mSpeculations;
 | |
|   mozilla::Mutex mSpeculationMutex;
 | |
| 
 | |
|   /**
 | |
|    * Number of times speculation has failed for this parser.
 | |
|    */
 | |
|   mozilla::Atomic<uint32_t> mSpeculationFailureCount;
 | |
| 
 | |
|   /**
 | |
|    * Number of bytes already buffered into mBufferedBytes.
 | |
|    */
 | |
|   uint32_t mNumBytesBuffered;
 | |
| 
 | |
|   nsTArray<mozilla::Buffer<uint8_t>> mBufferedBytes;
 | |
| 
 | |
|   /**
 | |
|    * True to terminate early.
 | |
|    */
 | |
|   mozilla::Atomic<bool> mTerminated;
 | |
| 
 | |
|   /**
 | |
|    * True to release mTokenizerMutex early.
 | |
|    */
 | |
|   mozilla::Atomic<bool> mInterrupted;
 | |
| 
 | |
|   /**
 | |
|    * The thread this stream parser runs on.
 | |
|    */
 | |
|   nsCOMPtr<nsISerialEventTarget> mEventTarget;
 | |
| 
 | |
|   nsCOMPtr<nsIRunnable> mExecutorFlusher;
 | |
| 
 | |
|   nsCOMPtr<nsIRunnable> mLoadFlusher;
 | |
| 
 | |
|   /**
 | |
|    * This runnable is distinct from the regular flushers to
 | |
|    * signal the intent of encoding commitment without having to
 | |
|    * protect mPendingEncodingCommitment in the executer with a
 | |
|    * mutex.
 | |
|    */
 | |
|   nsCOMPtr<nsIRunnable> mEncodingCommitter;
 | |
| 
 | |
|   /**
 | |
|    * The generict detector.
 | |
|    */
 | |
|   mozilla::UniquePtr<mozilla::EncodingDetector> mDetector;
 | |
| 
 | |
|   /**
 | |
|    * The TLD we're loading from or empty if unknown.
 | |
|    */
 | |
|   nsCString mTLD;
 | |
| 
 | |
|   /**
 | |
|    * Whether the initial charset source was kCharsetFromParentFrame
 | |
|    */
 | |
|   bool mInitialEncodingWasFromParentFrame;
 | |
| 
 | |
|   bool mHasHadErrors;
 | |
| 
 | |
|   bool mDetectorHasSeenNonAscii;
 | |
| 
 | |
|   /**
 | |
|    * If true, we are decoding a local file that lacks an encoding
 | |
|    * declaration and we are not tokenizing yet.
 | |
|    */
 | |
|   bool mDecodingLocalFileWithoutTokenizing;
 | |
| 
 | |
|   /**
 | |
|    * Whether we are keeping the incoming bytes.
 | |
|    */
 | |
|   bool mBufferingBytes;
 | |
| 
 | |
|   /**
 | |
|    * Timer for flushing tree ops once in a while when not speculating.
 | |
|    */
 | |
|   nsCOMPtr<nsITimer> mFlushTimer;
 | |
| 
 | |
|   /**
 | |
|    * Mutex for protecting access to mFlushTimer (but not for the two
 | |
|    * mFlushTimerFoo booleans below).
 | |
|    */
 | |
|   mozilla::Mutex mFlushTimerMutex;
 | |
| 
 | |
|   /**
 | |
|    * Keeps track whether mFlushTimer has been armed. Unfortunately,
 | |
|    * nsITimer doesn't enable querying this from the timer itself.
 | |
|    */
 | |
|   bool mFlushTimerArmed;
 | |
| 
 | |
|   /**
 | |
|    * False initially and true after the timer has fired at least once.
 | |
|    */
 | |
|   bool mFlushTimerEverFired;
 | |
| 
 | |
|   /**
 | |
|    * Whether the parser is doing a normal parse, view source or plain text.
 | |
|    */
 | |
|   eParserMode mMode;
 | |
| 
 | |
|   /**
 | |
|    * If the associated docshell is being watched by the devtools, this is
 | |
|    * set to the URI associated with the parse. All parse data is sent to the
 | |
|    * devtools, along with this URI. This URI is cleared out after the parse has
 | |
|    * been marked as completed.
 | |
|    */
 | |
|   nsCOMPtr<nsIURI> mURIToSendToDevtools;
 | |
| 
 | |
|   /**
 | |
|    * If content is being sent to the devtools, an encoded UUID for the parser.
 | |
|    */
 | |
|   nsString mUUIDForDevtools;
 | |
| };
 | |
| 
 | |
| #endif  // nsHtml5StreamParser_h
 | 
