Bug 1679987 - Add nsTokenizedRange adapter to enable range-based for with tokenizers. r=xpcom-reviewers,nika

Differential Revision: https://phabricator.services.mozilla.com/D98307
2020-12-16 19:10:13 +00:00 · 2020-12-16 19:10:13 +00:00 · ae63265100
commit ae63265100
parent fe1c53bd2d
2 changed files with 131 additions and 3 deletions
--- a/xpcom/ds/nsCharSeparatedTokenizer.h
+++ b/xpcom/ds/nsCharSeparatedTokenizer.h
@ -7,6 +7,7 @@
 #ifndef __nsCharSeparatedTokenizer_h
 #define __nsCharSeparatedTokenizer_h

+#include "mozilla/Maybe.h"
 #include "mozilla/RangedPtr.h"

 #include "nsDependentSubstring.h"
@ -30,16 +31,18 @@
 * The function used for whitespace detection is a template argument.
 * By default, it is NS_IsAsciiWhitespace.
 */
-template <typename DependentSubstringType, bool IsWhitespace(char16_t)>
+template <typename TDependentSubstringType, bool IsWhitespace(char16_t)>
 class nsTCharSeparatedTokenizer {
-  typedef typename DependentSubstringType::char_type CharType;
-  typedef typename DependentSubstringType::substring_type SubstringType;
+  using CharType = typename TDependentSubstringType::char_type;
+  using SubstringType = typename TDependentSubstringType::substring_type;

 public:
  // Flags -- only one for now. If we need more, they should be defined to
  // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
  enum { SEPARATOR_OPTIONAL = 1 };

+  using DependentSubstringType = TDependentSubstringType;
+
  nsTCharSeparatedTokenizer(const SubstringType& aSource,
                            CharType aSeparatorChar, uint32_t aFlags = 0)
      : mIter(aSource.Data(), aSource.Length()),
@ -143,6 +146,8 @@ class nsTCharSeparatedTokenizer {
    return Substring(tokenStart.get(), tokenEnd.get());
  }

+  auto ToRange() const;
+
 private:
  mozilla::RangedPtr<const CharType> mIter;
  const mozilla::RangedPtr<const CharType> mEnd;
@ -173,4 +178,66 @@ using nsCCharSeparatedTokenizerTemplate =
 using nsCCharSeparatedTokenizer =
    nsCCharSeparatedTokenizerTemplate<NS_IsAsciiWhitespace>;

+/**
+ * Adapts a char separated tokenizer for use in a range-based for loop.
+ *
+ * Use this typically only indirectly, e.g. like
+ *
+ * for (const auto& token : nsCharSeparatedTokenizer(aText, ' ').ToRange()) {
+ *    // ...
+ * }
+ */
+template <typename Tokenizer>
+class nsTokenizedRange {
+ public:
+  using DependentSubstringType = typename Tokenizer::DependentSubstringType;
+
+  explicit nsTokenizedRange(Tokenizer&& aTokenizer)
+      : mTokenizer(std::move(aTokenizer)) {}
+
+  struct EndSentinel {};
+  struct Iterator {
+    explicit Iterator(const Tokenizer& aTokenizer) : mTokenizer(aTokenizer) {
+      Next();
+    }
+
+    const DependentSubstringType& operator*() const { return *mCurrentToken; }
+
+    Iterator& operator++() {
+      Next();
+      return *this;
+    }
+
+    bool operator==(const EndSentinel&) const {
+      return mCurrentToken.isNothing();
+    }
+
+    bool operator!=(const EndSentinel&) const { return mCurrentToken.isSome(); }
+
+   private:
+    void Next() {
+      mCurrentToken.reset();
+
+      if (mTokenizer.hasMoreTokens()) {
+        mCurrentToken.emplace(mTokenizer.nextToken());
+      }
+    }
+
+    Tokenizer mTokenizer;
+    mozilla::Maybe<DependentSubstringType> mCurrentToken;
+  };
+
+  auto begin() const { return Iterator{mTokenizer}; }
+  auto end() const { return EndSentinel{}; }
+
+ private:
+  const Tokenizer mTokenizer;
+};
+
+template <typename TDependentSubstringType, bool IsWhitespace(char16_t)>
+auto nsTCharSeparatedTokenizer<TDependentSubstringType, IsWhitespace>::ToRange()
+    const {
+  return nsTokenizedRange{nsTCharSeparatedTokenizer{*this}};
+}
+
 #endif /* __nsCharSeparatedTokenizer_h */
--- a/xpcom/tests/gtest/TestStrings.cpp
+++ b/xpcom/tests/gtest/TestStrings.cpp
@ -7,6 +7,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "nsASCIIMask.h"
+#include "nsCharSeparatedTokenizer.h"
 #include "nsPrintfCString.h"
 #include "nsString.h"
 #include "nsStringBuffer.h"
@ -2037,6 +2038,66 @@ TEST_F(Strings, ConvertToSpan) {
  }
 }

+TEST_F(Strings, TokenizedRangeEmpty) {
+  // 8-bit strings
+  {
+    for (const auto& token : nsCCharSeparatedTokenizer(""_ns, ',').ToRange()) {
+      (void)token;
+      ADD_FAILURE();
+    }
+  }
+
+  // 16-bit strings
+  {
+    for (const auto& token : nsCharSeparatedTokenizer(u""_ns, ',').ToRange()) {
+      (void)token;
+      ADD_FAILURE();
+    }
+  }
+}
+
+TEST_F(Strings, TokenizedRangeWhitespaceOnly) {
+  // 8-bit strings
+  {
+    for (const auto& token : nsCCharSeparatedTokenizer(" "_ns, ',').ToRange()) {
+      (void)token;
+      ADD_FAILURE();
+    }
+  }
+
+  // 16-bit strings
+  {
+    for (const auto& token : nsCharSeparatedTokenizer(u" "_ns, ',').ToRange()) {
+      (void)token;
+      ADD_FAILURE();
+    }
+  }
+}
+
+TEST_F(Strings, TokenizedRangeNonEmpty) {
+  // 8-bit strings
+  {
+    nsTArray<nsCString> res;
+    for (const auto& token :
+         nsCCharSeparatedTokenizer("foo,bar"_ns, ',').ToRange()) {
+      res.EmplaceBack(token);
+    }
+
+    EXPECT_EQ(res, (nsTArray<nsCString>{"foo"_ns, "bar"_ns}));
+  }
+
+  // 16-bit strings
+  {
+    nsTArray<nsString> res;
+    for (const auto& token :
+         nsCharSeparatedTokenizer(u"foo,bar"_ns, ',').ToRange()) {
+      res.EmplaceBack(token);
+    }
+
+    EXPECT_EQ(res, (nsTArray<nsString>{u"foo"_ns, u"bar"_ns}));
+  }
+}
+
 // Macros for reducing verbosity of printf tests.
 #define create_printf_strings(format, ...)                 \
  nsCString appendPrintfString;                            \