forked from mirrors/gecko-dev
		
	 69df6a7000
			
		
	
	
		69df6a7000
		
	
	
	
	
		
			
			(No test case, because testing unsuited for CI both in terms of space and time.) Differential Revision: https://phabricator.services.mozilla.com/D143591
		
			
				
	
	
		
			444 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			444 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| #ifndef nsHtml5Highlighter_h
 | |
| #define nsHtml5Highlighter_h
 | |
| 
 | |
| #include "nsCOMPtr.h"
 | |
| #include "nsHtml5TreeOperation.h"
 | |
| #include "nsHtml5UTF16Buffer.h"
 | |
| #include "nsHtml5TreeOperation.h"
 | |
| #include "nsAHtml5TreeOpSink.h"
 | |
| 
 | |
| #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
 | |
| 
 | |
| /**
 | |
|  * A state machine for generating HTML for display in View Source based on
 | |
|  * the transitions the tokenizer makes on the source being viewed.
 | |
|  */
 | |
| class nsHtml5Highlighter {
 | |
|  public:
 | |
|   /**
 | |
|    * The constructor.
 | |
|    *
 | |
|    * @param aOpSink the sink for the tree ops generated by this highlighter
 | |
|    */
 | |
|   explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
 | |
| 
 | |
|   /**
 | |
|    * The destructor.
 | |
|    */
 | |
|   ~nsHtml5Highlighter();
 | |
| 
 | |
|   /**
 | |
|    * Set the op sink (for speculation).
 | |
|    */
 | |
|   void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
 | |
| 
 | |
|   /**
 | |
|    * Reset state to after generated head but before processing any of the input
 | |
|    * stream.
 | |
|    */
 | |
|   void Rewind();
 | |
| 
 | |
|   /**
 | |
|    * Starts the generated document.
 | |
|    */
 | |
|   void Start(const nsAutoString& aTitle);
 | |
| 
 | |
|   /**
 | |
|    * Updates the charset source via the op queue.
 | |
|    */
 | |
|   void UpdateCharsetSource(nsCharsetSource aCharsetSource);
 | |
| 
 | |
|   /**
 | |
|    * Report a tokenizer state transition.
 | |
|    *
 | |
|    * @param aState the state being transitioned to
 | |
|    * @param aReconsume whether this is a reconsuming transition
 | |
|    * @param aPos the tokenizer's current position into the buffer
 | |
|    */
 | |
|   int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
 | |
| 
 | |
|   /**
 | |
|    * Report end of file.
 | |
|    *
 | |
|    * Returns `true` normally and `false` on OOM.
 | |
|    */
 | |
|   [[nodiscard]] bool End();
 | |
| 
 | |
|   /**
 | |
|    * Set the current buffer being tokenized
 | |
|    */
 | |
|   void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
 | |
| 
 | |
|   /**
 | |
|    * Let go of the buffer being tokenized but first, flush text from it.
 | |
|    *
 | |
|    * @param aPos the first UTF-16 code unit not to flush
 | |
|    */
 | |
|   void DropBuffer(int32_t aPos);
 | |
| 
 | |
|   /**
 | |
|    * Query whether there are some many ops in the queue
 | |
|    * that they should be flushed now.
 | |
|    *
 | |
|    * @return true if FlushOps() should be called now
 | |
|    */
 | |
|   bool ShouldFlushOps();
 | |
| 
 | |
|   /**
 | |
|    * Flush the tree ops into the sink.
 | |
|    *
 | |
|    * @return Ok(true) if there were ops to flush, Ok(false)
 | |
|    *         if there were no ops to flush and Err() on OOM.
 | |
|    */
 | |
|   mozilla::Result<bool, nsresult> FlushOps();
 | |
| 
 | |
|   /**
 | |
|    * Linkify the current attribute value if the attribute name is one of
 | |
|    * known URL attributes. (When executing tree ops, javascript: URLs will
 | |
|    * not be linkified, though.)
 | |
|    *
 | |
|    * @param aName the name of the attribute
 | |
|    * @param aValue the value of the attribute
 | |
|    */
 | |
|   void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
 | |
|                                   nsHtml5String aValue);
 | |
| 
 | |
|   /**
 | |
|    * Inform the highlighter that the tokenizer successfully completed a
 | |
|    * named character reference.
 | |
|    */
 | |
|   void CompletedNamedCharacterReference();
 | |
| 
 | |
|   /**
 | |
|    * Adds an error annotation to the node that's currently on top of
 | |
|    * mStack.
 | |
|    *
 | |
|    * @param aMsgId the id of the message in the property file
 | |
|    */
 | |
|   void AddErrorToCurrentNode(const char* aMsgId);
 | |
| 
 | |
|   /**
 | |
|    * Adds an error annotation to the node that corresponds to the most
 | |
|    * recently opened markup declaration/tag span, character reference or
 | |
|    * run of text.
 | |
|    *
 | |
|    * @param aMsgId the id of the message in the property file
 | |
|    */
 | |
|   void AddErrorToCurrentRun(const char* aMsgId);
 | |
| 
 | |
|   /**
 | |
|    * Adds an error annotation to the node that corresponds to the most
 | |
|    * recently opened markup declaration/tag span, character reference or
 | |
|    * run of text with one atom to use when formatting the message.
 | |
|    *
 | |
|    * @param aMsgId the id of the message in the property file
 | |
|    * @param aName the atom
 | |
|    */
 | |
|   void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
 | |
| 
 | |
|   /**
 | |
|    * Adds an error annotation to the node that corresponds to the most
 | |
|    * recently opened markup declaration/tag span, character reference or
 | |
|    * run of text with two atoms to use when formatting the message.
 | |
|    *
 | |
|    * @param aMsgId the id of the message in the property file
 | |
|    * @param aName the first atom
 | |
|    * @param aOther the second atom
 | |
|    */
 | |
|   void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
 | |
| 
 | |
|   /**
 | |
|    * Adds an error annotation to the node that corresponds to the most
 | |
|    * recent potentially character reference-starting ampersand.
 | |
|    *
 | |
|    * @param aMsgId the id of the message in the property file
 | |
|    */
 | |
|   void AddErrorToCurrentAmpersand(const char* aMsgId);
 | |
| 
 | |
|   /**
 | |
|    * Adds an error annotation to the node that corresponds to the most
 | |
|    * recent potentially self-closing slash.
 | |
|    *
 | |
|    * @param aMsgId the id of the message in the property file
 | |
|    */
 | |
|   void AddErrorToCurrentSlash(const char* aMsgId);
 | |
| 
 | |
|   /**
 | |
|    * Enqueues a tree op for adding base to the urls with the view-source:
 | |
|    *
 | |
|    * @param aValue the base URL to add
 | |
|    */
 | |
|   void AddBase(nsHtml5String aValue);
 | |
| 
 | |
|   /**
 | |
|    * Starts a wrapper around a run of characters.
 | |
|    */
 | |
|   void StartCharacters();
 | |
| 
 | |
|  private:
 | |
|   /**
 | |
|    * Starts a span with no class.
 | |
|    */
 | |
|   void StartSpan();
 | |
| 
 | |
|   /**
 | |
|    * Starts a <span> and sets the class attribute on it.
 | |
|    *
 | |
|    * @param aClass the class to set (MUST be a static string that does not
 | |
|    *        need to be released!)
 | |
|    */
 | |
|   void StartSpan(const char16_t* aClass);
 | |
| 
 | |
|   /**
 | |
|    * End the current <span> or <a> in the highlighter output.
 | |
|    */
 | |
|   void EndSpanOrA();
 | |
| 
 | |
|   /**
 | |
|    * Ends a wrapper around a run of characters.
 | |
|    */
 | |
|   void EndCharactersAndStartMarkupRun();
 | |
| 
 | |
|   /**
 | |
|    * Starts an <a>.
 | |
|    */
 | |
|   void StartA();
 | |
| 
 | |
|   /**
 | |
|    * Flushes characters up to but not including the current one.
 | |
|    */
 | |
|   void FlushChars();
 | |
| 
 | |
|   /**
 | |
|    * Flushes characters up to and including the current one.
 | |
|    */
 | |
|   void FlushCurrent();
 | |
| 
 | |
|   /**
 | |
|    * Finishes highlighting a tag in the input data by closing the open
 | |
|    * <span> and <a> elements in the highlighter output and then starts
 | |
|    * another <span> for potentially highlighting characters potentially
 | |
|    * appearing next.
 | |
|    */
 | |
|   void FinishTag();
 | |
| 
 | |
|   /**
 | |
|    * Adds a class attribute to the current node.
 | |
|    *
 | |
|    * @param aClass the class to set (MUST be a static string that does not
 | |
|    *        need to be released!)
 | |
|    */
 | |
|   void AddClass(const char16_t* aClass);
 | |
| 
 | |
|   /**
 | |
|    * Allocates a handle for an element.
 | |
|    *
 | |
|    * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
 | |
|    * in nsHtml5TreeBuilderHSupplement.h.
 | |
|    *
 | |
|    * @return the handle
 | |
|    */
 | |
|   nsIContent** AllocateContentHandle();
 | |
| 
 | |
|   /**
 | |
|    * Enqueues an element creation tree operation.
 | |
|    *
 | |
|    * @param aName the name of the element
 | |
|    * @param aAttributes the attribute holder (ownership will be taken) or
 | |
|    *        nullptr for no attributes
 | |
|    * @param aIntendedParent the intended parent node for the created element
 | |
|    * @param aCreator the content creator function
 | |
|    * @return the handle for the element that will be created
 | |
|    */
 | |
|   nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
 | |
|                              nsIContent** aIntendedParent,
 | |
|                              mozilla::dom::HTMLContentCreatorFunction aCreator);
 | |
| 
 | |
|   /**
 | |
|    * Gets the handle for the current node. May be called only after the
 | |
|    * root element has been set.
 | |
|    *
 | |
|    * @return the handle for the current node
 | |
|    */
 | |
|   nsIContent** CurrentNode();
 | |
| 
 | |
|   /**
 | |
|    * Create an element and push it (its handle) on the stack.
 | |
|    *
 | |
|    * @param aName the name of the element
 | |
|    * @param aAttributes the attribute holder (ownership will be taken) or
 | |
|    *        nullptr for no attributes
 | |
|    * @param aCreator the content creator function
 | |
|    */
 | |
|   void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
 | |
|             mozilla::dom::HTMLContentCreatorFunction aCreator);
 | |
| 
 | |
|   /**
 | |
|    * Pops the current node off the stack.
 | |
|    */
 | |
|   void Pop();
 | |
| 
 | |
|   /**
 | |
|    * Appends text content to the current node.
 | |
|    *
 | |
|    * @param aBuffer the buffer to copy from
 | |
|    * @param aStart the index of the first code unit to copy
 | |
|    * @param aLength the number of code units to copy
 | |
|    */
 | |
|   void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
 | |
|                         int32_t aLength);
 | |
| 
 | |
|   /**
 | |
|    * Enqueues a tree op for adding an href attribute with the view-source:
 | |
|    * URL scheme to the current node.
 | |
|    *
 | |
|    * @param aValue the (potentially relative) URL to link to
 | |
|    */
 | |
|   void AddViewSourceHref(nsHtml5String aValue);
 | |
| 
 | |
|   /**
 | |
|    * The state we are transitioning away from.
 | |
|    */
 | |
|   int32_t mState;
 | |
| 
 | |
|   /**
 | |
|    * The index of the first UTF-16 code unit in mBuffer that hasn't been
 | |
|    * flushed yet.
 | |
|    */
 | |
|   int32_t mCStart;
 | |
| 
 | |
|   /**
 | |
|    * The position of the code unit in mBuffer that caused the current
 | |
|    * transition.
 | |
|    */
 | |
|   int32_t mPos;
 | |
| 
 | |
|   /**
 | |
|    * The current line number.
 | |
|    */
 | |
|   int32_t mLineNumber;
 | |
| 
 | |
|   /**
 | |
|    * The number of inline elements open inside the <pre> excluding the
 | |
|    * span potentially wrapping a run of characters.
 | |
|    */
 | |
|   int32_t mInlinesOpen;
 | |
| 
 | |
|   /**
 | |
|    * Whether there's a span wrapping a run of characters (excluding CDATA
 | |
|    * section) open.
 | |
|    */
 | |
|   bool mInCharacters;
 | |
| 
 | |
|   /**
 | |
|    * The current buffer being tokenized.
 | |
|    */
 | |
|   nsHtml5UTF16Buffer* mBuffer;
 | |
| 
 | |
|   /**
 | |
|    * The outgoing tree op queue.
 | |
|    */
 | |
|   nsTArray<nsHtml5TreeOperation> mOpQueue;
 | |
| 
 | |
|   /**
 | |
|    * The tree op stage for the tree op executor or a speculation when looking
 | |
|    * for meta charset.
 | |
|    *
 | |
|    * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
 | |
|    * object, because this object is owned by the nsHtml5Tokenizer instance that
 | |
|    * is owned by the nsHtml5StreamParser, which keeps the executor alive via
 | |
|    * nsHtml5Streamparser::mExecutorFlusher.
 | |
|    */
 | |
|   nsAHtml5TreeOpSink* mOpSink;
 | |
| 
 | |
|   /**
 | |
|    * The most recently opened markup declaration/tag or run of characters.
 | |
|    */
 | |
|   nsIContent** mCurrentRun;
 | |
| 
 | |
|   /**
 | |
|    * The most recent ampersand in a place where character references were
 | |
|    * allowed.
 | |
|    */
 | |
|   nsIContent** mAmpersand;
 | |
| 
 | |
|   /**
 | |
|    * The most recent slash that might become a self-closing slash.
 | |
|    */
 | |
|   nsIContent** mSlash;
 | |
| 
 | |
|   /**
 | |
|    * Memory for element handles.
 | |
|    */
 | |
|   mozilla::UniquePtr<nsIContent*[]> mHandles;
 | |
| 
 | |
|   /**
 | |
|    * Number of handles used in mHandles
 | |
|    */
 | |
|   int32_t mHandlesUsed;
 | |
| 
 | |
|   /**
 | |
|    * A holder for old contents of mHandles
 | |
|    */
 | |
|   nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
 | |
| 
 | |
|   /**
 | |
|    * The element stack.
 | |
|    */
 | |
|   nsTArray<nsIContent**> mStack;
 | |
| 
 | |
|   /**
 | |
|    * The string "comment"
 | |
|    */
 | |
|   static char16_t sComment[];
 | |
| 
 | |
|   /**
 | |
|    * The string "cdata"
 | |
|    */
 | |
|   static char16_t sCdata[];
 | |
| 
 | |
|   /**
 | |
|    * The string "start-tag"
 | |
|    */
 | |
|   static char16_t sStartTag[];
 | |
| 
 | |
|   /**
 | |
|    * The string "attribute-name"
 | |
|    */
 | |
|   static char16_t sAttributeName[];
 | |
| 
 | |
|   /**
 | |
|    * The string "attribute-value"
 | |
|    */
 | |
|   static char16_t sAttributeValue[];
 | |
| 
 | |
|   /**
 | |
|    * The string "end-tag"
 | |
|    */
 | |
|   static char16_t sEndTag[];
 | |
| 
 | |
|   /**
 | |
|    * The string "doctype"
 | |
|    */
 | |
|   static char16_t sDoctype[];
 | |
| 
 | |
|   /**
 | |
|    * The string "entity"
 | |
|    */
 | |
|   static char16_t sEntity[];
 | |
| 
 | |
|   /**
 | |
|    * The string "pi"
 | |
|    */
 | |
|   static char16_t sPi[];
 | |
| 
 | |
|   /**
 | |
|    * Whether base is already visited once.
 | |
|    */
 | |
|   bool mSeenBase;
 | |
| };
 | |
| 
 | |
| #endif  // nsHtml5Highlighter_h
 |