mirror of
				https://github.com/mozilla/gecko-dev.git
				synced 2025-11-04 10:18:41 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			256 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			256 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | 
						|
/* This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | 
						|
 | 
						|
#ifndef NS_EXPAT_DRIVER__
 | 
						|
#define NS_EXPAT_DRIVER__
 | 
						|
 | 
						|
#include "expat_config.h"
 | 
						|
#include "expat.h"
 | 
						|
#include "nsCOMPtr.h"
 | 
						|
#include "nsString.h"
 | 
						|
#include "nsIDTD.h"
 | 
						|
#include "nsIInputStream.h"
 | 
						|
#include "nsIParser.h"
 | 
						|
#include "nsCycleCollectionParticipant.h"
 | 
						|
#include "nsScanner.h"
 | 
						|
 | 
						|
#include "rlbox_expat.h"
 | 
						|
#include "nsRLBoxExpatDriver.h"
 | 
						|
#include "mozilla/UniquePtr.h"
 | 
						|
 | 
						|
class nsIExpatSink;
 | 
						|
struct nsCatalogData;
 | 
						|
class RLBoxExpatSandboxData;
 | 
						|
namespace mozilla {
 | 
						|
template <typename, size_t>
 | 
						|
class Array;
 | 
						|
}
 | 
						|
 | 
						|
class nsExpatDriver : public nsIDTD {
 | 
						|
  virtual ~nsExpatDriver();
 | 
						|
 | 
						|
 public:
 | 
						|
  NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
 | 
						|
  NS_DECL_NSIDTD
 | 
						|
  NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
 | 
						|
 | 
						|
  nsExpatDriver();
 | 
						|
 | 
						|
  nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
 | 
						|
 | 
						|
  nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
 | 
						|
 | 
						|
  int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
 | 
						|
                              const char16_t* aBase, const char16_t* aSystemId,
 | 
						|
                              const char16_t* aPublicId);
 | 
						|
  static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
 | 
						|
                                 tainted_expat<void*> aUserData,
 | 
						|
                                 tainted_expat<const char16_t*> aName,
 | 
						|
                                 tainted_expat<const char16_t**> aAtts);
 | 
						|
  static void HandleStartElementForSystemPrincipal(
 | 
						|
      rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
 | 
						|
      tainted_expat<const char16_t*> aName,
 | 
						|
      tainted_expat<const char16_t**> aAtts);
 | 
						|
  static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
 | 
						|
                               tainted_expat<void*> aUserData,
 | 
						|
                               tainted_expat<const char16_t*> aName);
 | 
						|
  static void HandleEndElementForSystemPrincipal(
 | 
						|
      rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
 | 
						|
      tainted_expat<const char16_t*> aName);
 | 
						|
  nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
 | 
						|
  nsresult HandleComment(const char16_t* aName);
 | 
						|
  nsresult HandleProcessingInstruction(const char16_t* aTarget,
 | 
						|
                                       const char16_t* aData);
 | 
						|
  nsresult HandleXMLDeclaration(const char16_t* aVersion,
 | 
						|
                                const char16_t* aEncoding, int32_t aStandalone);
 | 
						|
  nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
 | 
						|
  nsresult HandleStartCdataSection();
 | 
						|
  nsresult HandleEndCdataSection();
 | 
						|
  nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
 | 
						|
                                  const char16_t* aSysid,
 | 
						|
                                  const char16_t* aPubid,
 | 
						|
                                  bool aHasInternalSubset);
 | 
						|
  nsresult HandleEndDoctypeDecl();
 | 
						|
 | 
						|
 private:
 | 
						|
  // Load up an external stream to get external entity information
 | 
						|
  nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
 | 
						|
                                          const char16_t* aURLStr,
 | 
						|
                                          nsIURI* aBaseURI,
 | 
						|
                                          nsIInputStream** aStream,
 | 
						|
                                          nsIURI** aAbsURI);
 | 
						|
 | 
						|
  enum class ChunkOrBufferIsFinal {
 | 
						|
    None,
 | 
						|
    FinalChunk,
 | 
						|
    FinalChunkAndBuffer,
 | 
						|
  };
 | 
						|
 | 
						|
  /**
 | 
						|
   * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
 | 
						|
   * aLength should be 0. The result of the call will be stored in
 | 
						|
   * mInternalState. Expat will parse as much of the buffer as it can and store
 | 
						|
   * the rest in its internal buffer.
 | 
						|
   *
 | 
						|
   * @param aBuffer the buffer to pass to Expat. May be null.
 | 
						|
   * @param aLength the length of the buffer to pass to Expat (in number of
 | 
						|
   *                char16_t's). Must be 0 if aBuffer is null and > 0 if
 | 
						|
   *                aBuffer is not null.
 | 
						|
   * @param aIsFinal whether this is the last chunk in a row passed to
 | 
						|
   *                 ParseChunk, and if so whether it's the last chunk and
 | 
						|
   *                 buffer passed to ParseChunk (meaning there will be no more
 | 
						|
   *                 calls to ParseChunk for the document being parsed).
 | 
						|
   * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
 | 
						|
   *                        doesn't include the PRUnichars that Expat stored in
 | 
						|
   *                        its buffer but didn't parse yet.
 | 
						|
   * @param aLastLineLength [out] the length of the last line that Expat has
 | 
						|
   *                              consumed. This will only be computed if
 | 
						|
   *                              aIsFinal is not None or mInternalState is set
 | 
						|
   *                              to a failure.
 | 
						|
   */
 | 
						|
  void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
 | 
						|
                  ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
 | 
						|
                  XML_Size* aLastLineLength);
 | 
						|
  /**
 | 
						|
   * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
 | 
						|
   * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
 | 
						|
   * loop.
 | 
						|
   *
 | 
						|
   * @param aBuffer the buffer to pass to Expat. May be null.
 | 
						|
   * @param aLength the length of the buffer to pass to Expat (in number of
 | 
						|
   *                char16_t's). Must be 0 if aBuffer is null and > 0 if
 | 
						|
   *                aBuffer is not null.
 | 
						|
   * @param aIsFinal whether there will definitely not be any more new buffers
 | 
						|
   *                 passed in to ParseBuffer
 | 
						|
   * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
 | 
						|
   *                        doesn't include the PRUnichars that Expat stored in
 | 
						|
   *                        its buffer but didn't parse yet.
 | 
						|
   * @param aLastLineLength [out] the length of the last line that Expat has
 | 
						|
   *                              consumed.
 | 
						|
   */
 | 
						|
  void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
 | 
						|
                           bool aIsFinal, uint32_t* aPassedToExpat,
 | 
						|
                           uint32_t* aConsumed, XML_Size* aLastLineLength);
 | 
						|
 | 
						|
  nsresult HandleError();
 | 
						|
 | 
						|
  void MaybeStopParser(nsresult aState);
 | 
						|
 | 
						|
  bool BlockedOrInterrupted() {
 | 
						|
    return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
 | 
						|
           mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
 | 
						|
  }
 | 
						|
 | 
						|
  // Expat allows us to set the base URI for entities. It doesn't use the base
 | 
						|
  // URI itself, but just passes it along to all the entity handlers (just the
 | 
						|
  // external entity reference handler for us). It does expect the base URI as a
 | 
						|
  // null-terminated string, with the same character type as the parsed buffers
 | 
						|
  // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
 | 
						|
  // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
 | 
						|
  // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
 | 
						|
  // Most of the time this base URI is unused (the external entity handler is
 | 
						|
  // rarely called), but when it is we also convert it back to a nsIURI, so we
 | 
						|
  // convert the string back to UTF-8.
 | 
						|
  //
 | 
						|
  // We'd rather not do any of these conversions and copies, so we use a (hacky)
 | 
						|
  // workaround. We store all base URIs in an array of nsIURIs. Instead of
 | 
						|
  // passing the real URI to Expat as a string, we pass it a null-terminated
 | 
						|
  // 2-character buffer. The first character of that buffer stores the index of
 | 
						|
  // the corresponding nsIURI in the array (incremented with 1 because 0 is used
 | 
						|
  // to terminate a string). The entity handler can then use the index from the
 | 
						|
  // base URI that Expat passes it to look up the right nsIURI from the array.
 | 
						|
  //
 | 
						|
  // GetExpatBaseURI pushes the nsIURI to the array, and creates the
 | 
						|
  // two-character buffer for it.
 | 
						|
  //
 | 
						|
  // GetBaseURI looks up the right nsIURI in the array, based on the index from
 | 
						|
  // the two-character buffer.
 | 
						|
  using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
 | 
						|
  ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
 | 
						|
  nsIURI* GetBaseURI(const XML_Char* aBase) const;
 | 
						|
 | 
						|
  RLBoxExpatSandboxData* SandboxData() const;
 | 
						|
  rlbox_sandbox_expat* Sandbox() const;
 | 
						|
 | 
						|
  // Destroy expat parser and return sandbox to pool
 | 
						|
  void Destroy();
 | 
						|
 | 
						|
  mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
 | 
						|
  tainted_expat<XML_Parser> mExpatParser;
 | 
						|
 | 
						|
  nsString mLastLine;
 | 
						|
  nsString mCDataText;
 | 
						|
  // Various parts of a doctype
 | 
						|
  nsString mDoctypeName;
 | 
						|
  nsString mSystemID;
 | 
						|
  nsString mPublicID;
 | 
						|
  nsString mInternalSubset;
 | 
						|
  bool mInCData;
 | 
						|
  bool mInInternalSubset;
 | 
						|
  bool mInExternalDTD;
 | 
						|
  bool mMadeFinalCallToExpat;
 | 
						|
 | 
						|
  // Used to track if we're in the parser.
 | 
						|
  bool mInParser;
 | 
						|
 | 
						|
  nsresult mInternalState;
 | 
						|
 | 
						|
  // The length of the data in Expat's buffer (in number of PRUnichars).
 | 
						|
  uint32_t mExpatBuffered;
 | 
						|
 | 
						|
  uint16_t mTagDepth;
 | 
						|
 | 
						|
  // These sinks all refer the same conceptual object. mOriginalSink is
 | 
						|
  // identical with the nsIContentSink* passed to WillBuildModel, and exists
 | 
						|
  // only to avoid QI-ing back to nsIContentSink*.
 | 
						|
  nsCOMPtr<nsIContentSink> mOriginalSink;
 | 
						|
  nsCOMPtr<nsIExpatSink> mSink;
 | 
						|
 | 
						|
  const nsCatalogData* mCatalogData;  // weak
 | 
						|
  nsTArray<nsCOMPtr<nsIURI>> mURIs;
 | 
						|
 | 
						|
  // Used for error reporting.
 | 
						|
  uint64_t mInnerWindowID;
 | 
						|
};
 | 
						|
 | 
						|
class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
 | 
						|
  friend class RLBoxExpatSandboxPool;
 | 
						|
  friend class nsExpatDriver;
 | 
						|
 | 
						|
 public:
 | 
						|
  explicit RLBoxExpatSandboxData(uint64_t aSize)
 | 
						|
      : mozilla::RLBoxSandboxDataBase(aSize) {
 | 
						|
    MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
 | 
						|
  }
 | 
						|
  ~RLBoxExpatSandboxData();
 | 
						|
  rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
 | 
						|
  // After getting a sandbox from the pool we need to register the
 | 
						|
  // Handle{Start,End}Element callbacks and associate the driver with the
 | 
						|
  // sandbox.
 | 
						|
  void AttachDriver(bool IsSystemPrincipal, void* aDriver);
 | 
						|
  void DetachDriver();
 | 
						|
 | 
						|
 private:
 | 
						|
  mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
 | 
						|
  // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
 | 
						|
  // and consequently across sandbox reuses.
 | 
						|
  sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
 | 
						|
  sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
 | 
						|
  sandbox_callback_expat<XML_ProcessingInstructionHandler>
 | 
						|
      mHandleProcessingInstruction;
 | 
						|
  sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
 | 
						|
  sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
 | 
						|
  sandbox_callback_expat<XML_CommentHandler> mHandleComment;
 | 
						|
  sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
 | 
						|
  sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
 | 
						|
  sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
 | 
						|
  sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
 | 
						|
  // Expat callbacks specific to each driver, and thus (re)set across sandbox
 | 
						|
  // reuses.
 | 
						|
  sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
 | 
						|
  sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
 | 
						|
};
 | 
						|
 | 
						|
#endif
 |