forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			805 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			805 lines
		
	
	
	
		
			21 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | |
| /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| #include "Tokenizer.h"
 | |
| 
 | |
| #include "nsUnicharUtils.h"
 | |
| #include <algorithm>
 | |
| 
 | |
| namespace mozilla {
 | |
| 
 | |
| template <>
 | |
| char const TokenizerBase<char>::sWhitespaces[] = {' ', '\t', 0};
 | |
| template <>
 | |
| char16_t const TokenizerBase<char16_t>::sWhitespaces[3] = {' ', '\t', 0};
 | |
| 
 | |
| template <typename TChar>
 | |
| static bool contains(TChar const* const list, TChar const needle) {
 | |
|   for (TChar const* c = list; *c; ++c) {
 | |
|     if (needle == *c) {
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| TTokenizer<TChar>::TTokenizer(const typename base::TAString& aSource,
 | |
|                               const TChar* aWhitespaces,
 | |
|                               const TChar* aAdditionalWordChars)
 | |
|     : TokenizerBase<TChar>(aWhitespaces, aAdditionalWordChars) {
 | |
|   base::mInputFinished = true;
 | |
|   aSource.BeginReading(base::mCursor);
 | |
|   mRecord = mRollback = base::mCursor;
 | |
|   aSource.EndReading(base::mEnd);
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| TTokenizer<TChar>::TTokenizer(const TChar* aSource, const TChar* aWhitespaces,
 | |
|                               const TChar* aAdditionalWordChars)
 | |
|     : TTokenizer(typename base::TDependentString(aSource), aWhitespaces,
 | |
|                  aAdditionalWordChars) {}
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::Next(typename base::Token& aToken) {
 | |
|   if (!base::HasInput()) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   mRollback = base::mCursor;
 | |
|   base::mCursor = base::Parse(aToken);
 | |
| 
 | |
|   base::AssignFragment(aToken, mRollback, base::mCursor);
 | |
| 
 | |
|   base::mPastEof = aToken.Type() == base::TOKEN_EOF;
 | |
|   base::mHasFailed = false;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::Check(const typename base::TokenType aTokenType,
 | |
|                               typename base::Token& aResult) {
 | |
|   if (!base::HasInput()) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   typename base::TAString::const_char_iterator next = base::Parse(aResult);
 | |
|   if (aTokenType != aResult.Type()) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   mRollback = base::mCursor;
 | |
|   base::mCursor = next;
 | |
| 
 | |
|   base::AssignFragment(aResult, mRollback, base::mCursor);
 | |
| 
 | |
|   base::mPastEof = aResult.Type() == base::TOKEN_EOF;
 | |
|   base::mHasFailed = false;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::Check(const typename base::Token& aToken) {
 | |
| #ifdef DEBUG
 | |
|   base::Validate(aToken);
 | |
| #endif
 | |
| 
 | |
|   if (!base::HasInput()) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   typename base::Token parsed;
 | |
|   typename base::TAString::const_char_iterator next = base::Parse(parsed);
 | |
|   if (!aToken.Equals(parsed)) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   mRollback = base::mCursor;
 | |
|   base::mCursor = next;
 | |
|   base::mPastEof = parsed.Type() == base::TOKEN_EOF;
 | |
|   base::mHasFailed = false;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TTokenizer<TChar>::SkipWhites(WhiteSkipping aIncludeNewLines) {
 | |
|   if (!CheckWhite() &&
 | |
|       (aIncludeNewLines == DONT_INCLUDE_NEW_LINE || !CheckEOL())) {
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   typename base::TAString::const_char_iterator rollback = mRollback;
 | |
|   while (CheckWhite() || (aIncludeNewLines == INCLUDE_NEW_LINE && CheckEOL())) {
 | |
|   }
 | |
| 
 | |
|   base::mHasFailed = false;
 | |
|   mRollback = rollback;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TTokenizer<TChar>::SkipUntil(typename base::Token const& aToken) {
 | |
|   typename base::TAString::const_char_iterator rollback = base::mCursor;
 | |
|   const typename base::Token eof = base::Token::EndOfFile();
 | |
| 
 | |
|   typename base::Token t;
 | |
|   while (Next(t)) {
 | |
|     if (aToken.Equals(t) || eof.Equals(t)) {
 | |
|       Rollback();
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   mRollback = rollback;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::CheckChar(bool (*aClassifier)(const TChar aChar)) {
 | |
|   if (!aClassifier) {
 | |
|     MOZ_ASSERT(false);
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   if (!base::HasInput() || base::mCursor == base::mEnd) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   if (!aClassifier(*base::mCursor)) {
 | |
|     base::mHasFailed = true;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   mRollback = base::mCursor;
 | |
|   ++base::mCursor;
 | |
|   base::mHasFailed = false;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::CheckPhrase(const typename base::TAString& aPhrase) {
 | |
|   if (!base::HasInput()) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   typedef typename base::TAString::const_char_iterator Cursor;
 | |
| 
 | |
|   TTokenizer<TChar> pattern(aPhrase);
 | |
|   MOZ_ASSERT(!pattern.CheckEOF(),
 | |
|              "This will return true but won't shift the Tokenizer's cursor");
 | |
| 
 | |
|   return [&](Cursor cursor, Cursor rollback) mutable {
 | |
|     while (true) {
 | |
|       if (pattern.CheckEOF()) {
 | |
|         base::mHasFailed = false;
 | |
|         mRollback = cursor;
 | |
|         return true;
 | |
|       }
 | |
| 
 | |
|       typename base::Token t1, t2;
 | |
|       Unused << Next(t1);
 | |
|       Unused << pattern.Next(t2);
 | |
|       if (t1.Type() == t2.Type() && t1.Fragment().Equals(t2.Fragment())) {
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     base::mHasFailed = true;
 | |
|     base::mPastEof = false;
 | |
|     base::mCursor = cursor;
 | |
|     mRollback = rollback;
 | |
|     return false;
 | |
|   }(base::mCursor, mRollback);
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::ReadChar(TChar* aValue) {
 | |
|   MOZ_RELEASE_ASSERT(aValue);
 | |
| 
 | |
|   typename base::Token t;
 | |
|   if (!Check(base::TOKEN_CHAR, t)) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   *aValue = t.AsChar();
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::ReadChar(bool (*aClassifier)(const TChar aChar),
 | |
|                                  TChar* aValue) {
 | |
|   MOZ_RELEASE_ASSERT(aValue);
 | |
| 
 | |
|   if (!CheckChar(aClassifier)) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   *aValue = *mRollback;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::ReadWord(typename base::TAString& aValue) {
 | |
|   typename base::Token t;
 | |
|   if (!Check(base::TOKEN_WORD, t)) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   aValue.Assign(t.AsString());
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::ReadWord(typename base::TDependentSubstring& aValue) {
 | |
|   typename base::Token t;
 | |
|   if (!Check(base::TOKEN_WORD, t)) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   aValue.Rebind(t.AsString().BeginReading(), t.AsString().Length());
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
 | |
|                                   typename base::TAString& aResult,
 | |
|                                   ClaimInclusion aInclude) {
 | |
|   typename base::TDependentSubstring substring;
 | |
|   bool rv = ReadUntil(aToken, substring, aInclude);
 | |
|   aResult.Assign(substring);
 | |
|   return rv;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
 | |
|                                   typename base::TDependentSubstring& aResult,
 | |
|                                   ClaimInclusion aInclude) {
 | |
|   typename base::TAString::const_char_iterator record = mRecord;
 | |
|   Record();
 | |
|   typename base::TAString::const_char_iterator rollback = mRollback =
 | |
|       base::mCursor;
 | |
| 
 | |
|   bool found = false;
 | |
|   typename base::Token t;
 | |
|   while (Next(t)) {
 | |
|     if (aToken.Equals(t)) {
 | |
|       found = true;
 | |
|       break;
 | |
|     }
 | |
|     if (t.Equals(base::Token::EndOfFile())) {
 | |
|       // We don't want to eat it.
 | |
|       Rollback();
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   Claim(aResult, aInclude);
 | |
|   mRollback = rollback;
 | |
|   mRecord = record;
 | |
|   return found;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TTokenizer<TChar>::Rollback() {
 | |
|   MOZ_ASSERT(base::mCursor > mRollback || base::mPastEof, "TODO!!!");
 | |
| 
 | |
|   base::mPastEof = false;
 | |
|   base::mHasFailed = false;
 | |
|   base::mCursor = mRollback;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TTokenizer<TChar>::Record(ClaimInclusion aInclude) {
 | |
|   mRecord = aInclude == INCLUDE_LAST ? mRollback : base::mCursor;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TTokenizer<TChar>::Claim(typename base::TAString& aResult,
 | |
|                               ClaimInclusion aInclusion) {
 | |
|   typename base::TAString::const_char_iterator close =
 | |
|       aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
 | |
|   aResult.Assign(Substring(mRecord, close));
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TTokenizer<TChar>::Claim(typename base::TDependentSubstring& aResult,
 | |
|                               ClaimInclusion aInclusion) {
 | |
|   typename base::TAString::const_char_iterator close =
 | |
|       aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
 | |
| 
 | |
|   MOZ_RELEASE_ASSERT(close >= mRecord, "Overflow!");
 | |
|   aResult.Rebind(mRecord, close - mRecord);
 | |
| }
 | |
| 
 | |
| // TokenizerBase
 | |
| 
 | |
| template <typename TChar>
 | |
| TokenizerBase<TChar>::TokenizerBase(const TChar* aWhitespaces,
 | |
|                                     const TChar* aAdditionalWordChars)
 | |
|     : mPastEof(false),
 | |
|       mHasFailed(false),
 | |
|       mInputFinished(true),
 | |
|       mMode(Mode::FULL),
 | |
|       mMinRawDelivery(1024),
 | |
|       mWhitespaces(aWhitespaces ? aWhitespaces : sWhitespaces),
 | |
|       mAdditionalWordChars(aAdditionalWordChars),
 | |
|       mCursor(nullptr),
 | |
|       mEnd(nullptr),
 | |
|       mNextCustomTokenID(TOKEN_CUSTOM0) {}
 | |
| 
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::AddCustomToken(const TAString& aValue,
 | |
|                                           ECaseSensitivity aCaseInsensitivity,
 | |
|                                           bool aEnabled) -> Token {
 | |
|   MOZ_ASSERT(!aValue.IsEmpty());
 | |
| 
 | |
|   UniquePtr<Token>& t = *mCustomTokens.AppendElement();
 | |
|   t = MakeUnique<Token>();
 | |
| 
 | |
|   t->mType = static_cast<TokenType>(++mNextCustomTokenID);
 | |
|   t->mCustomCaseInsensitivity = aCaseInsensitivity;
 | |
|   t->mCustomEnabled = aEnabled;
 | |
|   t->mCustom.Assign(aValue);
 | |
|   return *t;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TokenizerBase<TChar>::RemoveCustomToken(Token& aToken) {
 | |
|   if (aToken.mType == TOKEN_UNKNOWN) {
 | |
|     // Already removed
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   for (UniquePtr<Token> const& custom : mCustomTokens) {
 | |
|     if (custom->mType == aToken.mType) {
 | |
|       mCustomTokens.RemoveElement(custom);
 | |
|       aToken.mType = TOKEN_UNKNOWN;
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   MOZ_ASSERT(false, "Token to remove not found");
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TokenizerBase<TChar>::EnableCustomToken(Token const& aToken,
 | |
|                                              bool aEnabled) {
 | |
|   if (aToken.mType == TOKEN_UNKNOWN) {
 | |
|     // Already removed
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   for (UniquePtr<Token> const& custom : mCustomTokens) {
 | |
|     if (custom->Type() == aToken.Type()) {
 | |
|       // This effectively destroys the token instance.
 | |
|       custom->mCustomEnabled = aEnabled;
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   MOZ_ASSERT(false, "Token to change not found");
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TokenizerBase<TChar>::SetTokenizingMode(Mode aMode) {
 | |
|   mMode = aMode;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::HasFailed() const {
 | |
|   return mHasFailed;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::HasInput() const {
 | |
|   return !mPastEof;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Parse(Token& aToken) const ->
 | |
|     typename TAString::const_char_iterator {
 | |
|   if (mCursor == mEnd) {
 | |
|     if (!mInputFinished) {
 | |
|       return mCursor;
 | |
|     }
 | |
| 
 | |
|     aToken = Token::EndOfFile();
 | |
|     return mEnd;
 | |
|   }
 | |
| 
 | |
|   MOZ_RELEASE_ASSERT(mEnd >= mCursor, "Overflow!");
 | |
|   typename TAString::size_type available = mEnd - mCursor;
 | |
| 
 | |
|   uint32_t longestCustom = 0;
 | |
|   for (UniquePtr<Token> const& custom : mCustomTokens) {
 | |
|     if (IsCustom(mCursor, *custom, &longestCustom)) {
 | |
|       aToken = *custom;
 | |
|       return mCursor + custom->mCustom.Length();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (!mInputFinished && available < longestCustom) {
 | |
|     // Not enough data to deterministically decide.
 | |
|     return mCursor;
 | |
|   }
 | |
| 
 | |
|   typename TAString::const_char_iterator next = mCursor;
 | |
| 
 | |
|   if (mMode == Mode::CUSTOM_ONLY) {
 | |
|     // We have to do a brute-force search for all of the enabled custom
 | |
|     // tokens.
 | |
|     while (next < mEnd) {
 | |
|       ++next;
 | |
|       for (UniquePtr<Token> const& custom : mCustomTokens) {
 | |
|         if (IsCustom(next, *custom)) {
 | |
|           aToken = Token::Raw();
 | |
|           return next;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if (mInputFinished) {
 | |
|       // End of the data reached.
 | |
|       aToken = Token::Raw();
 | |
|       return next;
 | |
|     }
 | |
| 
 | |
|     if (longestCustom < available && available > mMinRawDelivery) {
 | |
|       // We can return some data w/o waiting for either a custom token
 | |
|       // or call to FinishData() when we leave the tail where all the
 | |
|       // custom tokens potentially fit, so we can't lose only partially
 | |
|       // delivered tokens.  This preserves reasonable granularity.
 | |
|       aToken = Token::Raw();
 | |
|       return mEnd - longestCustom + 1;
 | |
|     }
 | |
| 
 | |
|     // Not enough data to deterministically decide.
 | |
|     return mCursor;
 | |
|   }
 | |
| 
 | |
|   enum State {
 | |
|     PARSE_INTEGER,
 | |
|     PARSE_WORD,
 | |
|     PARSE_CRLF,
 | |
|     PARSE_LF,
 | |
|     PARSE_WS,
 | |
|     PARSE_CHAR,
 | |
|   } state;
 | |
| 
 | |
|   if (IsWordFirst(*next)) {
 | |
|     state = PARSE_WORD;
 | |
|   } else if (IsNumber(*next)) {
 | |
|     state = PARSE_INTEGER;
 | |
|   } else if (contains(mWhitespaces, *next)) {  // not UTF-8 friendly?
 | |
|     state = PARSE_WS;
 | |
|   } else if (*next == '\r') {
 | |
|     state = PARSE_CRLF;
 | |
|   } else if (*next == '\n') {
 | |
|     state = PARSE_LF;
 | |
|   } else {
 | |
|     state = PARSE_CHAR;
 | |
|   }
 | |
| 
 | |
|   mozilla::CheckedUint64 resultingNumber = 0;
 | |
| 
 | |
|   while (next < mEnd) {
 | |
|     switch (state) {
 | |
|       case PARSE_INTEGER:
 | |
|         // Keep it simple for now
 | |
|         resultingNumber *= 10;
 | |
|         resultingNumber += static_cast<uint64_t>(*next - '0');
 | |
| 
 | |
|         ++next;
 | |
|         if (IsPending(next)) {
 | |
|           break;
 | |
|         }
 | |
|         if (IsEnd(next) || !IsNumber(*next)) {
 | |
|           if (!resultingNumber.isValid()) {
 | |
|             aToken = Token::Error();
 | |
|           } else {
 | |
|             aToken = Token::Number(resultingNumber.value());
 | |
|           }
 | |
|           return next;
 | |
|         }
 | |
|         break;
 | |
| 
 | |
|       case PARSE_WORD:
 | |
|         ++next;
 | |
|         if (IsPending(next)) {
 | |
|           break;
 | |
|         }
 | |
|         if (IsEnd(next) || !IsWord(*next)) {
 | |
|           aToken = Token::Word(Substring(mCursor, next));
 | |
|           return next;
 | |
|         }
 | |
|         break;
 | |
| 
 | |
|       case PARSE_CRLF:
 | |
|         ++next;
 | |
|         if (IsPending(next)) {
 | |
|           break;
 | |
|         }
 | |
|         if (!IsEnd(next) && *next == '\n') {  // LF is optional
 | |
|           ++next;
 | |
|         }
 | |
|         aToken = Token::NewLine();
 | |
|         return next;
 | |
| 
 | |
|       case PARSE_LF:
 | |
|         ++next;
 | |
|         aToken = Token::NewLine();
 | |
|         return next;
 | |
| 
 | |
|       case PARSE_WS:
 | |
|         ++next;
 | |
|         aToken = Token::Whitespace();
 | |
|         return next;
 | |
| 
 | |
|       case PARSE_CHAR:
 | |
|         ++next;
 | |
|         aToken = Token::Char(*mCursor);
 | |
|         return next;
 | |
|     }  // switch (state)
 | |
|   }    // while (next < end)
 | |
| 
 | |
|   MOZ_ASSERT(!mInputFinished);
 | |
|   return mCursor;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::IsEnd(
 | |
|     const typename TAString::const_char_iterator& caret) const {
 | |
|   return caret == mEnd;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::IsPending(
 | |
|     const typename TAString::const_char_iterator& caret) const {
 | |
|   return IsEnd(caret) && !mInputFinished;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::IsWordFirst(const TChar aInput) const {
 | |
|   // TODO: make this fully work with unicode
 | |
|   return (ToLowerCase(static_cast<uint32_t>(aInput)) !=
 | |
|           ToUpperCase(static_cast<uint32_t>(aInput))) ||
 | |
|          '_' == aInput ||
 | |
|          (mAdditionalWordChars ? contains(mAdditionalWordChars, aInput)
 | |
|                                : false);
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::IsWord(const TChar aInput) const {
 | |
|   return IsWordFirst(aInput) || IsNumber(aInput);
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::IsNumber(const TChar aInput) const {
 | |
|   // TODO: are there unicode numbers?
 | |
|   return aInput >= '0' && aInput <= '9';
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::IsCustom(
 | |
|     const typename TAString::const_char_iterator& caret,
 | |
|     const Token& aCustomToken, uint32_t* aLongest) const {
 | |
|   MOZ_ASSERT(aCustomToken.mType > TOKEN_CUSTOM0);
 | |
|   if (!aCustomToken.mCustomEnabled) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   if (aLongest) {
 | |
|     *aLongest = std::max<uint32_t>(*aLongest, aCustomToken.mCustom.Length());
 | |
|   }
 | |
| 
 | |
|   // This is not very likely to happen according to how we call this method
 | |
|   // and since it's on a hot path, it's just a diagnostic assert,
 | |
|   // not a release assert.
 | |
|   MOZ_DIAGNOSTIC_ASSERT(mEnd >= caret, "Overflow?");
 | |
|   uint32_t inputLength = mEnd - caret;
 | |
|   if (aCustomToken.mCustom.Length() > inputLength) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   TDependentSubstring inputFragment(caret, aCustomToken.mCustom.Length());
 | |
|   if (aCustomToken.mCustomCaseInsensitivity == CASE_INSENSITIVE) {
 | |
|     if constexpr (std::is_same_v<TChar, char>) {
 | |
|       return inputFragment.Equals(aCustomToken.mCustom,
 | |
|                                   nsCaseInsensitiveUTF8StringComparator);
 | |
|     } else {
 | |
|       return inputFragment.Equals(aCustomToken.mCustom,
 | |
|                                   nsCaseInsensitiveStringComparator);
 | |
|     }
 | |
|   }
 | |
|   return inputFragment.Equals(aCustomToken.mCustom);
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TokenizerBase<TChar>::AssignFragment(
 | |
|     Token& aToken, typename TAString::const_char_iterator begin,
 | |
|     typename TAString::const_char_iterator end) {
 | |
|   aToken.AssignFragment(begin, end);
 | |
| }
 | |
| 
 | |
| #ifdef DEBUG
 | |
| 
 | |
| template <typename TChar>
 | |
| void TokenizerBase<TChar>::Validate(Token const& aToken) {
 | |
|   if (aToken.Type() == TOKEN_WORD) {
 | |
|     typename TAString::const_char_iterator c = aToken.AsString().BeginReading();
 | |
|     typename TAString::const_char_iterator e = aToken.AsString().EndReading();
 | |
| 
 | |
|     if (c < e) {
 | |
|       MOZ_ASSERT(IsWordFirst(*c));
 | |
|       while (++c < e) {
 | |
|         MOZ_ASSERT(IsWord(*c));
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| // TokenizerBase::Token
 | |
| 
 | |
| template <typename TChar>
 | |
| TokenizerBase<TChar>::Token::Token()
 | |
|     : mType(TOKEN_UNKNOWN),
 | |
|       mChar(0),
 | |
|       mInteger(0),
 | |
|       mCustomCaseInsensitivity(CASE_SENSITIVE),
 | |
|       mCustomEnabled(false) {}
 | |
| 
 | |
| template <typename TChar>
 | |
| TokenizerBase<TChar>::Token::Token(const Token& aOther)
 | |
|     : mType(aOther.mType),
 | |
|       mCustom(aOther.mCustom),
 | |
|       mChar(aOther.mChar),
 | |
|       mInteger(aOther.mInteger),
 | |
|       mCustomCaseInsensitivity(aOther.mCustomCaseInsensitivity),
 | |
|       mCustomEnabled(aOther.mCustomEnabled) {
 | |
|   if (mType == TOKEN_WORD || mType > TOKEN_CUSTOM0) {
 | |
|     mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::operator=(const Token& aOther) -> Token& {
 | |
|   mType = aOther.mType;
 | |
|   mCustom = aOther.mCustom;
 | |
|   mChar = aOther.mChar;
 | |
|   mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
 | |
|   mInteger = aOther.mInteger;
 | |
|   mCustomCaseInsensitivity = aOther.mCustomCaseInsensitivity;
 | |
|   mCustomEnabled = aOther.mCustomEnabled;
 | |
|   return *this;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| void TokenizerBase<TChar>::Token::AssignFragment(
 | |
|     typename TAString::const_char_iterator begin,
 | |
|     typename TAString::const_char_iterator end) {
 | |
|   MOZ_RELEASE_ASSERT(end >= begin, "Overflow!");
 | |
|   mFragment.Rebind(begin, end - begin);
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::Raw() -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_RAW;
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::Word(TAString const& aValue) -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_WORD;
 | |
|   t.mWord.Rebind(aValue.BeginReading(), aValue.Length());
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::Char(TChar const aValue) -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_CHAR;
 | |
|   t.mChar = aValue;
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::Number(uint64_t const aValue) -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_INTEGER;
 | |
|   t.mInteger = aValue;
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::Whitespace() -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_WS;
 | |
|   t.mChar = '\0';
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::NewLine() -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_EOL;
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::EndOfFile() -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_EOF;
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| // static
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::Error() -> Token {
 | |
|   Token t;
 | |
|   t.mType = TOKEN_ERROR;
 | |
|   return t;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| bool TokenizerBase<TChar>::Token::Equals(const Token& aOther) const {
 | |
|   if (mType != aOther.mType) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   switch (mType) {
 | |
|     case TOKEN_INTEGER:
 | |
|       return AsInteger() == aOther.AsInteger();
 | |
|     case TOKEN_WORD:
 | |
|       return AsString() == aOther.AsString();
 | |
|     case TOKEN_CHAR:
 | |
|       return AsChar() == aOther.AsChar();
 | |
|     default:
 | |
|       return true;
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| TChar TokenizerBase<TChar>::Token::AsChar() const {
 | |
|   MOZ_ASSERT(mType == TOKEN_CHAR || mType == TOKEN_WS);
 | |
|   return mChar;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| auto TokenizerBase<TChar>::Token::AsString() const -> TDependentSubstring {
 | |
|   MOZ_ASSERT(mType == TOKEN_WORD);
 | |
|   return mWord;
 | |
| }
 | |
| 
 | |
| template <typename TChar>
 | |
| uint64_t TokenizerBase<TChar>::Token::AsInteger() const {
 | |
|   MOZ_ASSERT(mType == TOKEN_INTEGER);
 | |
|   return mInteger;
 | |
| }
 | |
| 
 | |
| template class TokenizerBase<char>;
 | |
| template class TokenizerBase<char16_t>;
 | |
| 
 | |
| template class TTokenizer<char>;
 | |
| template class TTokenizer<char16_t>;
 | |
| 
 | |
| }  // namespace mozilla
 | 
