forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			584 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			584 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | |
| /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| #ifndef Tokenizer_h__
 | |
| #define Tokenizer_h__
 | |
| 
 | |
| #include <type_traits>
 | |
| 
 | |
| #include "nsString.h"
 | |
| #include "mozilla/CheckedInt.h"
 | |
| #include "mozilla/ScopeExit.h"
 | |
| #include "mozilla/UniquePtr.h"
 | |
| #include "nsTArray.h"
 | |
| 
 | |
| namespace mozilla {
 | |
| 
 | |
| template <typename TChar>
 | |
| class TokenizerBase {
 | |
|  public:
 | |
|   typedef nsTSubstring<TChar> TAString;
 | |
|   typedef nsTString<TChar> TString;
 | |
|   typedef nsTDependentString<TChar> TDependentString;
 | |
|   typedef nsTDependentSubstring<TChar> TDependentSubstring;
 | |
| 
 | |
|   static TChar const sWhitespaces[];
 | |
| 
 | |
|   /**
 | |
|    * The analyzer works with elements in the input cut to a sequence of token
 | |
|    * where each token has an elementary type
 | |
|    */
 | |
|   enum TokenType : uint32_t {
 | |
|     TOKEN_UNKNOWN,
 | |
|     TOKEN_RAW,
 | |
|     TOKEN_ERROR,
 | |
|     TOKEN_INTEGER,
 | |
|     TOKEN_WORD,
 | |
|     TOKEN_CHAR,
 | |
|     TOKEN_WS,
 | |
|     TOKEN_EOL,
 | |
|     TOKEN_EOF,
 | |
|     TOKEN_CUSTOM0 = 1000
 | |
|   };
 | |
| 
 | |
|   enum ECaseSensitivity { CASE_SENSITIVE, CASE_INSENSITIVE };
 | |
| 
 | |
|   /**
 | |
|    * Class holding the type and the value of a token.  It can be manually
 | |
|    * created to allow checks against it via methods of TTokenizer or are results
 | |
|    * of some of the TTokenizer's methods.
 | |
|    */
 | |
|   class Token {
 | |
|     TokenType mType;
 | |
|     TDependentSubstring mWord;
 | |
|     TString mCustom;
 | |
|     TChar mChar;
 | |
|     uint64_t mInteger;
 | |
|     ECaseSensitivity mCustomCaseInsensitivity;
 | |
|     bool mCustomEnabled;
 | |
| 
 | |
|     // If this token is a result of the parsing process, this member is
 | |
|     // referencing a sub-string in the input buffer.  If this is externally
 | |
|     // created Token this member is left an empty string.
 | |
|     TDependentSubstring mFragment;
 | |
| 
 | |
|     friend class TokenizerBase<TChar>;
 | |
|     void AssignFragment(typename TAString::const_char_iterator begin,
 | |
|                         typename TAString::const_char_iterator end);
 | |
| 
 | |
|     static Token Raw();
 | |
| 
 | |
|    public:
 | |
|     Token();
 | |
|     Token(const Token& aOther);
 | |
|     Token& operator=(const Token& aOther);
 | |
| 
 | |
|     // Static constructors of tokens by type and value
 | |
|     static Token Word(TAString const& aWord);
 | |
|     static Token Char(TChar const aChar);
 | |
|     static Token Number(uint64_t const aNumber);
 | |
|     static Token Whitespace();
 | |
|     static Token NewLine();
 | |
|     static Token EndOfFile();
 | |
|     static Token Error();
 | |
| 
 | |
|     // Compares the two tokens, type must be identical and value
 | |
|     // of one of the tokens must be 'any' or equal.
 | |
|     bool Equals(const Token& aOther) const;
 | |
| 
 | |
|     TokenType Type() const { return mType; }
 | |
|     TChar AsChar() const;
 | |
|     TDependentSubstring AsString() const;
 | |
|     uint64_t AsInteger() const;
 | |
| 
 | |
|     TDependentSubstring Fragment() const { return mFragment; }
 | |
|   };
 | |
| 
 | |
|   /**
 | |
|    * Consumers may register a custom string that, when found in the input, is
 | |
|    * considered a token and returned by Next*() and accepted by Check*()
 | |
|    * methods. AddCustomToken() returns a reference to a token that can then be
 | |
|    * comapred using Token::Equals() againts the output from Next*() or be passed
 | |
|    * to Check*().
 | |
|    */
 | |
|   Token AddCustomToken(const TAString& aValue,
 | |
|                        ECaseSensitivity aCaseInsensitivity,
 | |
|                        bool aEnabled = true);
 | |
|   template <uint32_t N>
 | |
|   Token AddCustomToken(const TChar (&aValue)[N],
 | |
|                        ECaseSensitivity aCaseInsensitivity,
 | |
|                        bool aEnabled = true) {
 | |
|     return AddCustomToken(TDependentSubstring(aValue, N - 1),
 | |
|                           aCaseInsensitivity, aEnabled);
 | |
|   }
 | |
|   void RemoveCustomToken(Token& aToken);
 | |
|   /**
 | |
|    * Only applies to a custom type of a Token (see AddCustomToken above.)
 | |
|    * This turns on and off token recognition.  When a custom token is disabled,
 | |
|    * it's ignored as never added as a custom token.
 | |
|    */
 | |
|   void EnableCustomToken(Token const& aToken, bool aEnable);
 | |
| 
 | |
|   /**
 | |
|    * Mode of tokenization.
 | |
|    * FULL tokenization, the default, recognizes built-in tokens and any custom
 | |
|    * tokens, if added. CUSTOM_ONLY will only recognize custom tokens, the rest
 | |
|    * is seen as 'raw'. This mode can be understood as a 'binary' mode.
 | |
|    */
 | |
|   enum class Mode { FULL, CUSTOM_ONLY };
 | |
|   void SetTokenizingMode(Mode aMode);
 | |
| 
 | |
|   /**
 | |
|    * Return false iff the last Check*() call has returned false or when we've
 | |
|    * read past the end of the input string.
 | |
|    */
 | |
|   [[nodiscard]] bool HasFailed() const;
 | |
| 
 | |
|  protected:
 | |
|   explicit TokenizerBase(const TChar* aWhitespaces = nullptr,
 | |
|                          const TChar* aAdditionalWordChars = nullptr);
 | |
| 
 | |
|   // false if we have already read the EOF token.
 | |
|   bool HasInput() const;
 | |
|   // Main parsing function, it doesn't shift the read cursor, just returns the
 | |
|   // next token position.
 | |
|   typename TAString::const_char_iterator Parse(Token& aToken) const;
 | |
|   // Is read cursor at the end?
 | |
|   bool IsEnd(const typename TAString::const_char_iterator& caret) const;
 | |
|   // True, when we are at the end of the input data, but it has not been marked
 | |
|   // as complete yet.  In that case we cannot proceed with providing a
 | |
|   // multi-TChar token.
 | |
|   bool IsPending(const typename TAString::const_char_iterator& caret) const;
 | |
|   // Is read cursor on a character that is a word start?
 | |
|   bool IsWordFirst(const TChar aInput) const;
 | |
|   // Is read cursor on a character that is an in-word letter?
 | |
|   bool IsWord(const TChar aInput) const;
 | |
|   // Is read cursor on a character that is a valid number?
 | |
|   // TODO - support multiple radix
 | |
|   bool IsNumber(const TChar aInput) const;
 | |
|   // Is equal to the given custom token?
 | |
|   bool IsCustom(const typename TAString::const_char_iterator& caret,
 | |
|                 const Token& aCustomToken, uint32_t* aLongest = nullptr) const;
 | |
| 
 | |
|   // Friendly helper to assign a fragment on a Token
 | |
|   static void AssignFragment(Token& aToken,
 | |
|                              typename TAString::const_char_iterator begin,
 | |
|                              typename TAString::const_char_iterator end);
 | |
| 
 | |
| #ifdef DEBUG
 | |
|   // This is called from inside Tokenizer methods to make sure the token is
 | |
|   // valid.
 | |
|   void Validate(Token const& aToken);
 | |
| #endif
 | |
| 
 | |
|   // true iff we have already read the EOF token
 | |
|   bool mPastEof;
 | |
|   // true iff the last Check*() call has returned false, reverts to true on
 | |
|   // Rollback() call
 | |
|   bool mHasFailed;
 | |
|   // true if the input string is final (finished), false when we expect more
 | |
|   // data yet to be fed to the tokenizer (see IncrementalTokenizer derived
 | |
|   // class).
 | |
|   bool mInputFinished;
 | |
|   // custom only vs full tokenizing mode, see the Parse() method
 | |
|   Mode mMode;
 | |
|   // minimal raw data chunked delivery during incremental feed
 | |
|   uint32_t mMinRawDelivery;
 | |
| 
 | |
|   // Customizable list of whitespaces
 | |
|   const TChar* mWhitespaces;
 | |
|   // Additinal custom word characters
 | |
|   const TChar* mAdditionalWordChars;
 | |
| 
 | |
|   // All these point to the original buffer passed to the constructor or to the
 | |
|   // incremental buffer after FeedInput.
 | |
|   typename TAString::const_char_iterator
 | |
|       mCursor;  // Position of the current (actually next to read) token start
 | |
|   typename TAString::const_char_iterator mEnd;  // End of the input position
 | |
| 
 | |
|   // This is the list of tokens user has registered with AddCustomToken()
 | |
|   nsTArray<UniquePtr<Token>> mCustomTokens;
 | |
|   uint32_t mNextCustomTokenID;
 | |
| 
 | |
|  private:
 | |
|   TokenizerBase() = delete;
 | |
|   TokenizerBase(const TokenizerBase&) = delete;
 | |
|   TokenizerBase(TokenizerBase&&) = delete;
 | |
|   TokenizerBase(const TokenizerBase&&) = delete;
 | |
|   TokenizerBase& operator=(const TokenizerBase&) = delete;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * This is a simple implementation of a lexical analyzer or maybe better
 | |
|  * called a tokenizer.
 | |
|  *
 | |
|  * Please use Tokenizer or Tokenizer16 classes, that are specializations
 | |
|  * of this template class.  Tokenizer is for ASCII input, Tokenizer16 may
 | |
|  * handle char16_t input, but doesn't recognize whitespaces or numbers
 | |
|  * other than standard `char` specialized Tokenizer class.
 | |
|  */
 | |
| template <typename TChar>
 | |
| class TTokenizer : public TokenizerBase<TChar> {
 | |
|  public:
 | |
|   typedef TokenizerBase<TChar> base;
 | |
| 
 | |
|   /**
 | |
|    * @param aSource
 | |
|    *    The string to parse.
 | |
|    *    IMPORTANT NOTE: TTokenizer doesn't ensure the input string buffer
 | |
|    * lifetime. It's up to the consumer to make sure the string's buffer outlives
 | |
|    * the TTokenizer!
 | |
|    * @param aWhitespaces
 | |
|    *    If non-null TTokenizer will use this custom set of whitespaces for
 | |
|    * CheckWhite() and SkipWhites() calls. By default the list consists of space
 | |
|    * and tab.
 | |
|    * @param aAdditionalWordChars
 | |
|    *    If non-null it will be added to the list of characters that consist a
 | |
|    * word. This is useful when you want to accept e.g. '-' in HTTP headers. By
 | |
|    * default a word character is consider any character for which upper case
 | |
|    *    is different from lower case.
 | |
|    *
 | |
|    * If there is an overlap between aWhitespaces and aAdditionalWordChars, the
 | |
|    * check for word characters is made first.
 | |
|    */
 | |
|   explicit TTokenizer(const typename base::TAString& aSource,
 | |
|                       const TChar* aWhitespaces = nullptr,
 | |
|                       const TChar* aAdditionalWordChars = nullptr);
 | |
|   explicit TTokenizer(const TChar* aSource, const TChar* aWhitespaces = nullptr,
 | |
|                       const TChar* aAdditionalWordChars = nullptr);
 | |
| 
 | |
|   /**
 | |
|    * When there is still anything to read from the input, tokenize it, store the
 | |
|    * token type and value to aToken result and shift the cursor past this just
 | |
|    * parsed token.  Each call to Next() reads another token from the input and
 | |
|    * shifts the cursor. Returns false if we have passed the end of the input.
 | |
|    */
 | |
|   [[nodiscard]] bool Next(typename base::Token& aToken);
 | |
| 
 | |
|   /**
 | |
|    * Parse the token on the input read cursor position, check its type is equal
 | |
|    * to aTokenType and if so, put it into aResult, shift the cursor and return
 | |
|    * true.  Otherwise, leave the input read cursor position intact and return
 | |
|    * false.
 | |
|    */
 | |
|   [[nodiscard]] bool Check(const typename base::TokenType aTokenType,
 | |
|                            typename base::Token& aResult);
 | |
|   /**
 | |
|    * Same as above method, just compares both token type and token value passed
 | |
|    * in aToken. When both the type and the value equals, shift the cursor and
 | |
|    * return true.  Otherwise return false.
 | |
|    */
 | |
|   [[nodiscard]] bool Check(const typename base::Token& aToken);
 | |
| 
 | |
|   /**
 | |
|    * SkipWhites method (below) may also skip new line characters automatically.
 | |
|    */
 | |
|   enum WhiteSkipping {
 | |
|     /**
 | |
|      * SkipWhites will only skip what is defined as a white space (default).
 | |
|      */
 | |
|     DONT_INCLUDE_NEW_LINE = 0,
 | |
|     /**
 | |
|      * SkipWhites will skip definited white spaces as well as new lines
 | |
|      * automatically.
 | |
|      */
 | |
|     INCLUDE_NEW_LINE = 1
 | |
|   };
 | |
| 
 | |
|   /**
 | |
|    * Skips any occurence of whitespaces specified in mWhitespaces member,
 | |
|    * optionally skip also new lines.
 | |
|    */
 | |
|   void SkipWhites(WhiteSkipping aIncludeNewLines = DONT_INCLUDE_NEW_LINE);
 | |
| 
 | |
|   /**
 | |
|    * Skips all tokens until the given one is found or EOF is hit.  The token
 | |
|    * or EOF are next to read.
 | |
|    */
 | |
|   void SkipUntil(typename base::Token const& aToken);
 | |
| 
 | |
|   // These are mostly shortcuts for the Check() methods above.
 | |
| 
 | |
|   /**
 | |
|    * Check whitespace character is present.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckWhite() { return Check(base::Token::Whitespace()); }
 | |
|   /**
 | |
|    * Check there is a single character on the read cursor position.  If so,
 | |
|    * shift the read cursor position and return true.  Otherwise false.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckChar(const TChar aChar) {
 | |
|     return Check(base::Token::Char(aChar));
 | |
|   }
 | |
|   /**
 | |
|    * This is a customizable version of CheckChar.  aClassifier is a function
 | |
|    * called with value of the character on the current input read position.  If
 | |
|    * this user function returns true, read cursor is shifted and true returned.
 | |
|    * Otherwise false. The user classifiction function is not called when we are
 | |
|    * at or past the end and false is immediately returned.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckChar(bool (*aClassifier)(const TChar aChar));
 | |
|   /**
 | |
|    * Check for a whole expected word.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckWord(const typename base::TAString& aWord) {
 | |
|     return Check(base::Token::Word(aWord));
 | |
|   }
 | |
|   /**
 | |
|    * Shortcut for literal const word check with compile time length calculation.
 | |
|    */
 | |
|   template <uint32_t N>
 | |
|   [[nodiscard]] bool CheckWord(const TChar (&aWord)[N]) {
 | |
|     return Check(
 | |
|         base::Token::Word(typename base::TDependentString(aWord, N - 1)));
 | |
|   }
 | |
|   /**
 | |
|    * Helper to check for a string compound of multiple tokens like "foo bar".
 | |
|    * The match is binary-exact, a white space or a delimiter character in the
 | |
|    * phrase must match exactly the characters in the input.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckPhrase(const typename base::TAString& aPhrase);
 | |
|   template <uint32_t N>
 | |
|   [[nodiscard]] bool CheckPhrase(const TChar (&aPhrase)[N]) {
 | |
|     return CheckPhrase(typename base::TDependentString(aPhrase, N - 1));
 | |
|   }
 | |
|   /**
 | |
|    * Checks \r, \n or \r\n.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckEOL() { return Check(base::Token::NewLine()); }
 | |
|   /**
 | |
|    * Checks we are at the end of the input string reading.  If so, shift past
 | |
|    * the end and returns true.  Otherwise does nothing and returns false.
 | |
|    */
 | |
|   [[nodiscard]] bool CheckEOF() { return Check(base::Token::EndOfFile()); }
 | |
| 
 | |
|   /**
 | |
|    * These are shortcuts to obtain the value immediately when the token type
 | |
|    * matches.
 | |
|    */
 | |
|   [[nodiscard]] bool ReadChar(TChar* aValue);
 | |
|   [[nodiscard]] bool ReadChar(bool (*aClassifier)(const TChar aChar),
 | |
|                               TChar* aValue);
 | |
|   [[nodiscard]] bool ReadWord(typename base::TAString& aValue);
 | |
|   [[nodiscard]] bool ReadWord(typename base::TDependentSubstring& aValue);
 | |
| 
 | |
|   /**
 | |
|    * This is an integer read helper.  It returns false and doesn't move the read
 | |
|    * cursor when any of the following happens:
 | |
|    *  - the token at the read cursor is not an integer
 | |
|    *  - the final number doesn't fit the T type
 | |
|    * Otherwise true is returned, aValue is filled with the integral number
 | |
|    * and the cursor is moved forward.
 | |
|    */
 | |
|   template <typename T>
 | |
|   [[nodiscard]] bool ReadInteger(T* aValue) {
 | |
|     MOZ_RELEASE_ASSERT(aValue);
 | |
| 
 | |
|     typename base::TAString::const_char_iterator rollback = mRollback;
 | |
|     typename base::TAString::const_char_iterator cursor = base::mCursor;
 | |
|     typename base::Token t;
 | |
|     if (!Check(base::TOKEN_INTEGER, t)) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     mozilla::CheckedInt<T> checked(t.AsInteger());
 | |
|     if (!checked.isValid()) {
 | |
|       // Move to a state as if Check() call has failed
 | |
|       mRollback = rollback;
 | |
|       base::mCursor = cursor;
 | |
|       base::mHasFailed = true;
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     *aValue = checked.value();
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Same as above, but accepts an integer with an optional minus sign.
 | |
|    */
 | |
|   template <typename T, typename V = std::enable_if_t<
 | |
|                             std::is_signed_v<std::remove_pointer_t<T>>,
 | |
|                             std::remove_pointer_t<T>>>
 | |
|   [[nodiscard]] bool ReadSignedInteger(T* aValue) {
 | |
|     MOZ_RELEASE_ASSERT(aValue);
 | |
| 
 | |
|     typename base::TAString::const_char_iterator rollback = mRollback;
 | |
|     typename base::TAString::const_char_iterator cursor = base::mCursor;
 | |
|     auto revert = MakeScopeExit([&] {
 | |
|       // Move to a state as if Check() call has failed
 | |
|       mRollback = rollback;
 | |
|       base::mCursor = cursor;
 | |
|       base::mHasFailed = true;
 | |
|     });
 | |
| 
 | |
|     // Using functional raw access because '-' could be part of the word set
 | |
|     // making CheckChar('-') not work.
 | |
|     bool minus = CheckChar([](const TChar aChar) { return aChar == '-'; });
 | |
| 
 | |
|     typename base::Token t;
 | |
|     if (!Check(base::TOKEN_INTEGER, t)) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     mozilla::CheckedInt<T> checked(t.AsInteger());
 | |
|     if (minus) {
 | |
|       checked *= -1;
 | |
|     }
 | |
| 
 | |
|     if (!checked.isValid()) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     *aValue = checked.value();
 | |
|     revert.release();
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * This is an hexadecimal read helper.  It returns false and doesn't move the
 | |
|    * read cursor when any of the following happens:
 | |
|    *  - the token at the read cursor is not 0, and it's not followed by x
 | |
|    *  - the token(s) that follow don't make a valid hexadecimal number
 | |
|    *  - the final number doesn't fit the T type
 | |
|    * Otherwise true is returned, aValue is filled with the integral number
 | |
|    * and the cursor is moved forward.
 | |
|    */
 | |
|   template <typename T>
 | |
|   [[nodiscard]] bool ReadHexadecimal(T* aValue, bool aPrefixed = true) {
 | |
|     MOZ_RELEASE_ASSERT(aValue);
 | |
| 
 | |
|     typename base::TAString::const_char_iterator rollback = mRollback;
 | |
|     typename base::TAString::const_char_iterator cursor = base::mCursor;
 | |
|     auto revert = MakeScopeExit([&] {
 | |
|       // Move to a state as if Check() call has failed
 | |
|       mRollback = rollback;
 | |
|       base::mCursor = cursor;
 | |
|       base::mHasFailed = true;
 | |
|     });
 | |
| 
 | |
|     if (aPrefixed) {
 | |
|       typename base::Token t;
 | |
|       if (!Check(base::TOKEN_INTEGER, t) && t.AsInteger() != 0) {
 | |
|         return false;
 | |
|       }
 | |
| 
 | |
|       if (!CheckChar([](const TChar aChar) { return aChar == 'x'; })) {
 | |
|         return false;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     TChar c = 'z';
 | |
|     mozilla::CheckedInt<T> resultingNumber = 0;
 | |
|     while (ReadChar(
 | |
|         [](const TChar aChar) {
 | |
|           return (aChar >= '0' && aChar <= '9') ||
 | |
|                  (aChar >= 'A' && aChar <= 'F') ||
 | |
|                  (aChar >= 'a' && aChar <= 'f');
 | |
|         },
 | |
|         &c)) {
 | |
|       resultingNumber *= 16;
 | |
|       if (c <= '9') {
 | |
|         resultingNumber += static_cast<uint64_t>(c - '0');
 | |
|       } else if (c <= 'F') {
 | |
|         resultingNumber += static_cast<uint64_t>(c - 'A') + 0xa;
 | |
|       } else {
 | |
|         resultingNumber += static_cast<uint64_t>(c - 'a') + 0xa;
 | |
|       }
 | |
|     }
 | |
|     if (c == 'z' || !resultingNumber.isValid()) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     *aValue = resultingNumber.value();
 | |
|     revert.release();
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Returns the read cursor position back as it was before the last call of any
 | |
|    * parsing method of TTokenizer (Next, Check*, Skip*, Read*) so that the last
 | |
|    * operation can be repeated. Rollback cannot be used multiple times, it only
 | |
|    * reverts the last successfull parse operation.  It also cannot be used
 | |
|    * before any parsing operation has been called on the TTokenizer.
 | |
|    */
 | |
|   void Rollback();
 | |
| 
 | |
|   /**
 | |
|    * Record() and Claim() are collecting the input as it is being parsed to
 | |
|    * obtain a substring between particular syntax bounderies defined by any
 | |
|    * recursive descent parser or simple parser the TTokenizer is used to read
 | |
|    * the input for. Inlucsion of a token that has just been parsed can be
 | |
|    * controlled using an arguemnt.
 | |
|    */
 | |
|   enum ClaimInclusion {
 | |
|     /**
 | |
|      * Include resulting (or passed) token of the last lexical analyzer
 | |
|      * operation in the result.
 | |
|      */
 | |
|     INCLUDE_LAST,
 | |
|     /**
 | |
|      * Do not include it.
 | |
|      */
 | |
|     EXCLUDE_LAST
 | |
|   };
 | |
| 
 | |
|   /**
 | |
|    * Start the process of recording.  Based on aInclude value the begining of
 | |
|    * the recorded sub-string is at the current position (EXCLUDE_LAST) or at the
 | |
|    * position before the last parsed token (INCLUDE_LAST).
 | |
|    */
 | |
|   void Record(ClaimInclusion aInclude = EXCLUDE_LAST);
 | |
|   /**
 | |
|    * Claim result of the record started with Record() call before.  Depending on
 | |
|    * aInclude the ending of the sub-string result includes or excludes the last
 | |
|    * parsed or checked token.
 | |
|    */
 | |
|   void Claim(typename base::TAString& aResult,
 | |
|              ClaimInclusion aInclude = EXCLUDE_LAST);
 | |
|   void Claim(typename base::TDependentSubstring& aResult,
 | |
|              ClaimInclusion aInclude = EXCLUDE_LAST);
 | |
| 
 | |
|   /**
 | |
|    * If aToken is found, aResult is set to the substring between the current
 | |
|    * position and the position of aToken, potentially including aToken depending
 | |
|    * on aInclude.
 | |
|    * If aToken isn't found aResult is set to the substring between the current
 | |
|    * position and the end of the string.
 | |
|    * If aToken is found, the method returns true. Otherwise it returns false.
 | |
|    *
 | |
|    * Calling Rollback() after ReadUntil() will return the read cursor to the
 | |
|    * position it had before ReadUntil was called.
 | |
|    */
 | |
|   [[nodiscard]] bool ReadUntil(typename base::Token const& aToken,
 | |
|                                typename base::TDependentSubstring& aResult,
 | |
|                                ClaimInclusion aInclude = EXCLUDE_LAST);
 | |
|   [[nodiscard]] bool ReadUntil(typename base::Token const& aToken,
 | |
|                                typename base::TAString& aResult,
 | |
|                                ClaimInclusion aInclude = EXCLUDE_LAST);
 | |
| 
 | |
|  protected:
 | |
|   // All these point to the original buffer passed to the TTokenizer's
 | |
|   // constructor
 | |
|   typename base::TAString::const_char_iterator
 | |
|       mRecord;  // Position where the recorded sub-string for Claim() is
 | |
|   typename base::TAString::const_char_iterator
 | |
|       mRollback;  // Position of the previous token start
 | |
| 
 | |
|  private:
 | |
|   TTokenizer() = delete;
 | |
|   TTokenizer(const TTokenizer&) = delete;
 | |
|   TTokenizer(TTokenizer&&) = delete;
 | |
|   TTokenizer(const TTokenizer&&) = delete;
 | |
|   TTokenizer& operator=(const TTokenizer&) = delete;
 | |
| };
 | |
| 
 | |
| typedef TTokenizer<char> Tokenizer;
 | |
| typedef TTokenizer<char16_t> Tokenizer16;
 | |
| 
 | |
| }  // namespace mozilla
 | |
| 
 | |
| #endif  // Tokenizer_h__
 | 
