forked from mirrors/gecko-dev
		
	 14ca007916
			
		
	
	
		14ca007916
		
	
	
	
	
		
			
			Also adds missing includes in some files, these were previously only transivitely included through mozilla/TypeTraits.h. Differential Revision: https://phabricator.services.mozilla.com/D68561 --HG-- extra : moz-landing-system : lando
		
			
				
	
	
		
			288 lines
		
	
	
	
		
			8.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			288 lines
		
	
	
	
		
			8.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | |
| /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| /* Character/text operations. */
 | |
| 
 | |
| #ifndef mozilla_TextUtils_h
 | |
| #define mozilla_TextUtils_h
 | |
| 
 | |
| #include "mozilla/Assertions.h"
 | |
| #include "mozilla/Latin1.h"
 | |
| 
 | |
| #ifdef MOZ_HAS_JSRUST
 | |
| // Can't include mozilla/Encoding.h here.
 | |
| extern "C" {
 | |
| // Declared as uint8_t instead of char to match declaration in another header.
 | |
| size_t encoding_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| namespace mozilla {
 | |
| 
 | |
| // See Utf8.h for IsUtf8() and conversions between UTF-8 and UTF-16.
 | |
| // See Latin1.h for testing UTF-16 and UTF-8 for Latin1ness and
 | |
| // for conversions to and from Latin1.
 | |
| 
 | |
| // The overloads below are not templated in order to make
 | |
| // implicit conversions to span work as expected for the Span
 | |
| // overloads.
 | |
| 
 | |
| /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
 | |
| constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; }
 | |
| 
 | |
| /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
 | |
| constexpr bool IsAscii(signed char aChar) {
 | |
|   return IsAscii(static_cast<unsigned char>(aChar));
 | |
| }
 | |
| 
 | |
| /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
 | |
| constexpr bool IsAscii(char aChar) {
 | |
|   return IsAscii(static_cast<unsigned char>(aChar));
 | |
| }
 | |
| 
 | |
| /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
 | |
| constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; }
 | |
| 
 | |
| /** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
 | |
| constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| iff |aString| contains only ASCII characters, that is,
 | |
|  * characters in the range [0x00, 0x80).
 | |
|  *
 | |
|  * @param aString a 8-bit wide string to scan
 | |
|  */
 | |
| inline bool IsAscii(mozilla::Span<const char> aString) {
 | |
| #if MOZ_HAS_JSRUST()
 | |
|   size_t length = aString.Length();
 | |
|   const char* ptr = aString.Elements();
 | |
|   // For short strings, avoid the function call, since, the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < mozilla::detail::kShortStringLimitForInlinePaths) {
 | |
|     const uint8_t* uptr = reinterpret_cast<const uint8_t*>(ptr);
 | |
|     uint8_t accu = 0;
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       accu |= uptr[i];
 | |
|     }
 | |
|     return accu < 0x80;
 | |
|   }
 | |
|   return encoding_mem_is_ascii(ptr, length);
 | |
| #else
 | |
|   for (char c : aString) {
 | |
|     if (!IsAscii(c)) {
 | |
|       return false;
 | |
|     }
 | |
|   }
 | |
|   return true;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| iff |aString| contains only ASCII characters, that is,
 | |
|  * characters in the range [0x00, 0x80).
 | |
|  *
 | |
|  * @param aString a 16-bit wide string to scan
 | |
|  */
 | |
| inline bool IsAscii(mozilla::Span<const char16_t> aString) {
 | |
| #if MOZ_HAS_JSRUST()
 | |
|   size_t length = aString.Length();
 | |
|   const char16_t* ptr = aString.Elements();
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   // 16 is a bit larger than logically necessary for this function alone,
 | |
|   // but it's important that the limit here matches the limit used in
 | |
|   // LossyConvertUtf16toLatin1!
 | |
|   if (length < mozilla::detail::kShortStringLimitForInlinePaths) {
 | |
|     char16_t accu = 0;
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       accu |= ptr[i];
 | |
|     }
 | |
|     return accu < 0x80;
 | |
|   }
 | |
|   return encoding_mem_is_basic_latin(ptr, length);
 | |
| #else
 | |
|   for (char16_t c : aString) {
 | |
|     if (!IsAscii(c)) {
 | |
|       return false;
 | |
|     }
 | |
|   }
 | |
|   return true;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff every character in the null-terminated string pointed to by
 | |
|  * |aChar| is ASCII, i.e. in the range [0, 0x80).
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiNullTerminated(const Char* aChar) {
 | |
|   while (Char c = *aChar++) {
 | |
|     if (!IsAscii(c)) {
 | |
|       return false;
 | |
|     }
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| #if MOZ_HAS_JSRUST()
 | |
| /**
 | |
|  * Returns the index of the first non-ASCII byte or
 | |
|  * the length of the string if there are none.
 | |
|  */
 | |
| inline size_t AsciiValidUpTo(mozilla::Span<const char> aString) {
 | |
|   return encoding_ascii_valid_up_to(
 | |
|       reinterpret_cast<const uint8_t*>(aString.Elements()), aString.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns the index of the first unpaired surrogate or
 | |
|  * the length of the string if there are none.
 | |
|  */
 | |
| inline size_t Utf16ValidUpTo(mozilla::Span<const char16_t> aString) {
 | |
|   return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Replaces unpaired surrogates with U+FFFD in the argument.
 | |
|  *
 | |
|  * Note: If you have an nsAString, use EnsureUTF16Validity() from
 | |
|  * nsReadableUtils.h instead to avoid unsharing a valid shared
 | |
|  * string.
 | |
|  */
 | |
| inline void EnsureUtf16ValiditySpan(mozilla::Span<char16_t> aString) {
 | |
|   encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Convert ASCII to UTF-16. In debug builds, assert that the input is
 | |
|  * ASCII.
 | |
|  *
 | |
|  * The length of aDest must not be less than the length of aSource.
 | |
|  */
 | |
| inline void ConvertAsciitoUtf16(mozilla::Span<const char> aSource,
 | |
|                                 mozilla::Span<char16_t> aDest) {
 | |
|   MOZ_ASSERT(IsAscii(aSource));
 | |
|   ConvertLatin1toUtf16(aSource, aDest);
 | |
| }
 | |
| 
 | |
| #endif  // MOZ_HAS_JSRUST
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches Ascii Whitespace.
 | |
|  *
 | |
|  * This function is intended to match the Infra standard
 | |
|  * (https://infra.spec.whatwg.org/#ascii-whitespace)
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiWhitespace(Char aChar) {
 | |
|   using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
 | |
|   auto uc = static_cast<UnsignedChar>(aChar);
 | |
|   return uc == 0x9 || uc == 0xA || uc == 0xC || uc == 0xD || uc == 0x20;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches [a-z].
 | |
|  *
 | |
|  * This function is basically what you thought islower was, except its behavior
 | |
|  * doesn't depend on the user's current locale.
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiLowercaseAlpha(Char aChar) {
 | |
|   using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
 | |
|   auto uc = static_cast<UnsignedChar>(aChar);
 | |
|   return 'a' <= uc && uc <= 'z';
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches [A-Z].
 | |
|  *
 | |
|  * This function is basically what you thought isupper was, except its behavior
 | |
|  * doesn't depend on the user's current locale.
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiUppercaseAlpha(Char aChar) {
 | |
|   using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
 | |
|   auto uc = static_cast<UnsignedChar>(aChar);
 | |
|   return 'A' <= uc && uc <= 'Z';
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches [a-zA-Z].
 | |
|  *
 | |
|  * This function is basically what you thought isalpha was, except its behavior
 | |
|  * doesn't depend on the user's current locale.
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiAlpha(Char aChar) {
 | |
|   return IsAsciiLowercaseAlpha(aChar) || IsAsciiUppercaseAlpha(aChar);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches [0-9].
 | |
|  *
 | |
|  * This function is basically what you thought isdigit was, except its behavior
 | |
|  * doesn't depend on the user's current locale.
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiDigit(Char aChar) {
 | |
|   using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
 | |
|   auto uc = static_cast<UnsignedChar>(aChar);
 | |
|   return '0' <= uc && uc <= '9';
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches [0-9a-fA-F].
 | |
|  *
 | |
|  * This function is basically isxdigit, but guaranteed to be only for ASCII.
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiHexDigit(Char aChar) {
 | |
|   using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
 | |
|   auto uc = static_cast<UnsignedChar>(aChar);
 | |
|   return ('0' <= uc && uc <= '9') || ('a' <= uc && uc <= 'f') ||
 | |
|          ('A' <= uc && uc <= 'F');
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns true iff |aChar| matches [a-zA-Z0-9].
 | |
|  *
 | |
|  * This function is basically what you thought isalnum was, except its behavior
 | |
|  * doesn't depend on the user's current locale.
 | |
|  */
 | |
| template <typename Char>
 | |
| constexpr bool IsAsciiAlphanumeric(Char aChar) {
 | |
|   return IsAsciiDigit(aChar) || IsAsciiAlpha(aChar);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Converts an ASCII alphanumeric digit [0-9a-zA-Z] to number as if in base-36.
 | |
|  * (This function therefore works for decimal, hexadecimal, etc.).
 | |
|  */
 | |
| template <typename Char>
 | |
| uint8_t AsciiAlphanumericToNumber(Char aChar) {
 | |
|   using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
 | |
|   auto uc = static_cast<UnsignedChar>(aChar);
 | |
| 
 | |
|   if ('0' <= uc && uc <= '9') {
 | |
|     return uc - '0';
 | |
|   }
 | |
| 
 | |
|   if ('A' <= uc && uc <= 'Z') {
 | |
|     return uc - 'A' + 10;
 | |
|   }
 | |
| 
 | |
|   // Ideally this function would be constexpr, but unfortunately gcc at least as
 | |
|   // of 6.4 forbids non-constexpr function calls in unevaluated constexpr
 | |
|   // function calls.  See bug 1453456.  So for now, just assert and leave the
 | |
|   // entire function non-constexpr.
 | |
|   MOZ_ASSERT('a' <= uc && uc <= 'z',
 | |
|              "non-ASCII alphanumeric character can't be converted to number");
 | |
|   return uc - 'a' + 10;
 | |
| }
 | |
| 
 | |
| }  // namespace mozilla
 | |
| 
 | |
| #endif /* mozilla_TextUtils_h */
 |