forked from mirrors/gecko-dev
		
	 3a52652295
			
		
	
	
		3a52652295
		
	
	
	
	
		
			
			MozReview-Commit-ID: 9uG6j8UdfKR Differential Revision: https://phabricator.services.mozilla.com/D3413 --HG-- extra : moz-landing-system : lando
		
			
				
	
	
		
			834 lines
		
	
	
	
		
			27 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			834 lines
		
	
	
	
		
			27 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | |
| /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| // IWYU pragma: private, include "nsString.h"
 | |
| 
 | |
| #ifndef nsReadableUtils_h___
 | |
| #define nsReadableUtils_h___
 | |
| 
 | |
| /**
 | |
|  * I guess all the routines in this file are all mis-named.
 | |
|  * According to our conventions, they should be |NS_xxx|.
 | |
|  */
 | |
| 
 | |
| #include "mozilla/Assertions.h"
 | |
| #include "nsAString.h"
 | |
| 
 | |
| #include "nsTArrayForwardDeclare.h"
 | |
| 
 | |
| // Can't include mozilla/Encoding.h here. The implementations are in
 | |
| // the encoding_rs and encoding_glue crates.
 | |
| extern "C" {
 | |
|   size_t
 | |
|   encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   bool
 | |
|   encoding_mem_is_ascii(uint8_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   bool
 | |
|   encoding_mem_is_basic_latin(char16_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   bool
 | |
|   encoding_mem_is_utf8_latin1(uint8_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   bool
 | |
|   encoding_mem_is_str_latin1(uint8_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   bool
 | |
|   encoding_mem_is_utf16_latin1(char16_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   size_t
 | |
|   encoding_mem_utf16_valid_up_to(char16_t const* buffer, size_t buffer_len);
 | |
| 
 | |
|   void
 | |
|   encoding_mem_ensure_utf16_validity(char16_t* buffer, size_t buffer_len);
 | |
| 
 | |
|   void
 | |
|   encoding_mem_convert_utf16_to_latin1_lossy(const char16_t* src,
 | |
|                                              size_t src_len,
 | |
|                                              char* dst,
 | |
|                                              size_t dst_len);
 | |
| 
 | |
|   size_t
 | |
|   encoding_mem_convert_utf8_to_latin1_lossy(const char* src,
 | |
|                                             size_t src_len,
 | |
|                                             char* dst,
 | |
|                                             size_t dst_len);
 | |
| 
 | |
|   void
 | |
|   encoding_mem_convert_latin1_to_utf16(const char* src,
 | |
|                                        size_t src_len,
 | |
|                                        char16_t* dst,
 | |
|                                        size_t dst_len);
 | |
| 
 | |
|   size_t
 | |
|   encoding_mem_convert_utf16_to_utf8(const char16_t* src,
 | |
|                                      size_t src_len,
 | |
|                                      char* dst,
 | |
|                                      size_t dst_len);
 | |
| 
 | |
|   size_t
 | |
|   encoding_mem_convert_utf8_to_utf16(const char* src,
 | |
|                                      size_t src_len,
 | |
|                                      char16_t* dst,
 | |
|                                      size_t dst_len);
 | |
| }
 | |
| 
 | |
| // From the nsstring crate
 | |
| extern "C" {
 | |
|   bool
 | |
|   nsstring_fallible_append_utf8_impl(nsAString* aThis,
 | |
|                                      const char* aOther,
 | |
|                                      size_t aOtherLen,
 | |
|                                      size_t aOldLen);
 | |
| 
 | |
|   bool
 | |
|   nsstring_fallible_append_latin1_impl(nsAString* aThis,
 | |
|                                        const char* aOther,
 | |
|                                        size_t aOtherLen,
 | |
|                                        size_t aOldLen);
 | |
| 
 | |
|   bool
 | |
|   nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis,
 | |
|                                                const char16_t*,
 | |
|                                                size_t aOtherLen,
 | |
|                                                size_t aOldLen);
 | |
| 
 | |
|   bool
 | |
|   nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis,
 | |
|                                                        const char16_t*,
 | |
|                                                        size_t aOtherLen,
 | |
|                                                        size_t aOldLen);
 | |
| 
 | |
|   bool
 | |
|   nscstring_fallible_append_utf8_to_latin1_lossy_check(nsACString* aThis,
 | |
|                                                        const nsACString* aOther,
 | |
|                                                        size_t aOldLen);
 | |
| 
 | |
|   bool
 | |
|   nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis,
 | |
|                                                  const nsACString* aOther,
 | |
|                                                  size_t aOldLen);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * If all the code points in the input are below U+0100, converts to Latin1,
 | |
|  * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
 | |
|  * there are code points above U+00FF, produces garbage in a memory-safe way
 | |
|  * and will likely start asserting in future debug builds. The nature of the
 | |
|  * garbage depends on the CPU architecture and must not be relied upon.
 | |
|  *
 | |
|  * The length of aDest must be not be less than the length of aSource.
 | |
|  */
 | |
| inline void
 | |
| LossyConvertUTF16toLatin1(mozilla::Span<const char16_t> aSource,
 | |
|                           mozilla::Span<char> aDest)
 | |
| {
 | |
|   encoding_mem_convert_utf16_to_latin1_lossy(
 | |
|     aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * If all the code points in the input are below U+0100, converts to Latin1,
 | |
|  * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
 | |
|  * there are code points above U+00FF, asserts in debug builds and produces
 | |
|  * garbage in memory-safe way in release builds. The nature of the garbage
 | |
|  * may depend on the CPU architecture and must not be relied upon.
 | |
|  *
 | |
|  * The length of aDest must be not be less than the length of aSource.
 | |
|  */
 | |
| inline size_t
 | |
| LossyConvertUTF8toLatin1(mozilla::Span<const char> aSource,
 | |
|                          mozilla::Span<char> aDest)
 | |
| {
 | |
|   return encoding_mem_convert_utf8_to_latin1_lossy(
 | |
|     aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Interprets unsigned byte value as Unicode scalar value (i.e. not
 | |
|  * windows-1252!).
 | |
|  *
 | |
|  * The length of aDest must be not be less than the length of aSource.
 | |
|  */
 | |
| inline void
 | |
| ConvertLatin1toUTF16(mozilla::Span<const char> aSource,
 | |
|                      mozilla::Span<char16_t> aDest)
 | |
| {
 | |
|   encoding_mem_convert_latin1_to_utf16(
 | |
|     aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Lone surrogates are replaced with the REPLACEMENT CHARACTER.
 | |
|  *
 | |
|  * The length of aDest must be at least the length of aSource times three
 | |
|  * _plus one_.
 | |
|  *
 | |
|  * Returns the number of code units written.
 | |
|  */
 | |
| inline size_t
 | |
| ConvertUTF16toUTF8(mozilla::Span<const char16_t> aSource,
 | |
|                    mozilla::Span<char> aDest)
 | |
| {
 | |
|   return encoding_mem_convert_utf16_to_utf8(
 | |
|     aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Malformed byte sequences are replaced with the REPLACEMENT CHARACTER.
 | |
|  *
 | |
|  * The length of aDest must at least one greater than the length of aSource.
 | |
|  *
 | |
|  * Returns the number of code units written.
 | |
|  */
 | |
| inline size_t
 | |
| ConvertUTF8toUTF16(mozilla::Span<const char> aSource,
 | |
|                    mozilla::Span<char16_t> aDest)
 | |
| {
 | |
|   return encoding_mem_convert_utf8_to_utf16(
 | |
|     aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 | |
| }
 | |
| 
 | |
| inline size_t
 | |
| Distance(const nsReadingIterator<char16_t>& aStart,
 | |
|          const nsReadingIterator<char16_t>& aEnd)
 | |
| {
 | |
|   MOZ_ASSERT(aStart.get() <= aEnd.get());
 | |
|   return static_cast<size_t>(aEnd.get() - aStart.get());
 | |
| }
 | |
| 
 | |
| inline size_t
 | |
| Distance(const nsReadingIterator<char>& aStart,
 | |
|          const nsReadingIterator<char>& aEnd)
 | |
| {
 | |
|   MOZ_ASSERT(aStart.get() <= aEnd.get());
 | |
|   return static_cast<size_t>(aEnd.get() - aStart.get());
 | |
| }
 | |
| 
 | |
| // UTF-8 to UTF-16
 | |
| // Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| CopyUTF8toUTF16(mozilla::Span<const char> aSource,
 | |
|                 nsAString& aDest,
 | |
|                 const mozilla::fallible_t&)
 | |
| {
 | |
|   return nsstring_fallible_append_utf8_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), 0);
 | |
| }
 | |
| 
 | |
| inline void
 | |
| CopyUTF8toUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| AppendUTF8toUTF16(mozilla::Span<const char> aSource,
 | |
|                   nsAString& aDest,
 | |
|                   const mozilla::fallible_t&)
 | |
| {
 | |
|   return nsstring_fallible_append_utf8_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
 | |
| }
 | |
| 
 | |
| inline void
 | |
| AppendUTF8toUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aDest.Length() + aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Latin1 to UTF-16
 | |
| // Interpret each incoming unsigned byte value as a Unicode scalar value (not
 | |
| // windows-1252!). The function names say "ASCII" instead of "Latin1" for
 | |
| // legacy reasons.
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| CopyASCIItoUTF16(mozilla::Span<const char> aSource,
 | |
|                  nsAString& aDest,
 | |
|                  const mozilla::fallible_t&)
 | |
| {
 | |
|   return nsstring_fallible_append_latin1_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), 0);
 | |
| }
 | |
| 
 | |
| inline void
 | |
| CopyASCIItoUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| AppendASCIItoUTF16(mozilla::Span<const char> aSource,
 | |
|                    nsAString& aDest,
 | |
|                    const mozilla::fallible_t&)
 | |
| {
 | |
|   return nsstring_fallible_append_latin1_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
 | |
| }
 | |
| 
 | |
| inline void
 | |
| AppendASCIItoUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aDest.Length() + aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| // UTF-16 to UTF-8
 | |
| // Unpaired surrogates are replaced with the REPLACEMENT CHARACTER.
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource,
 | |
|                 nsACString& aDest,
 | |
|                 const mozilla::fallible_t&)
 | |
| {
 | |
|   return nscstring_fallible_append_utf16_to_utf8_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), 0);
 | |
| }
 | |
| 
 | |
| inline void
 | |
| CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, nsACString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource,
 | |
|                   nsACString& aDest,
 | |
|                   const mozilla::fallible_t&)
 | |
| {
 | |
|   return nscstring_fallible_append_utf16_to_utf8_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
 | |
| }
 | |
| 
 | |
| inline void
 | |
| AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource, nsACString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aDest.Length() + aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| // UTF-16 to Latin1
 | |
| // If all code points in the input are below U+0100, represents each scalar
 | |
| // value as an unsigned byte. (This is not windows-1252!) If there are code
 | |
| // points above U+00FF, memory-safely produces garbage and will likely start
 | |
| // asserting in future debug builds. The nature of the garbage may differ
 | |
| // based on CPU architecture and must not be relied upon. The names say 
 | |
| // "ASCII" instead of "Latin1" for legacy reasons.
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource,
 | |
|                       nsACString& aDest,
 | |
|                       const mozilla::fallible_t&)
 | |
| {
 | |
|   return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), 0);
 | |
| }
 | |
| 
 | |
| inline void
 | |
| LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource, nsACString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| inline MOZ_MUST_USE bool
 | |
| LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
 | |
|                         nsACString& aDest,
 | |
|                         const mozilla::fallible_t&)
 | |
| {
 | |
|   return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
 | |
|     &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
 | |
| }
 | |
| 
 | |
| inline void
 | |
| LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
 | |
|                         nsACString& aDest)
 | |
| {
 | |
|   if (MOZ_UNLIKELY(
 | |
|         !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) {
 | |
|     aDest.AllocFailed(aDest.Length() + aSource.Length());
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
 | |
|  *
 | |
|  * Allocates and returns a new |char| buffer which you must free with |free|.
 | |
|  * Performs a conversion with LossyConvertUTF16toLatin1() writing into the
 | |
|  * newly-allocated buffer.
 | |
|  *
 | |
|  * The new buffer is zero-terminated, but that may not help you if |aSource|
 | |
|  * contains embedded nulls.
 | |
|  *
 | |
|  * @param aSource a 16-bit wide string
 | |
|  * @return a new |char| buffer you must free with |free|.
 | |
|  */
 | |
| char* ToNewCString(const nsAString& aSource);
 | |
| 
 | |
| /**
 | |
|  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
 | |
|  *
 | |
|  * Allocates and returns a new |char| buffer which you must free with |free|.
 | |
|  *
 | |
|  * The new buffer is zero-terminated, but that may not help you if |aSource|
 | |
|  * contains embedded nulls.
 | |
|  *
 | |
|  * @param aSource an 8-bit wide string
 | |
|  * @return a new |char| buffer you must free with |free|.
 | |
|  */
 | |
| char* ToNewCString(const nsACString& aSource);
 | |
| 
 | |
| /**
 | |
|  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
 | |
|  *
 | |
|  * Allocates and returns a new |char| buffer which you must free with
 | |
|  * |free|.
 | |
|  * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with
 | |
|  * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying
 | |
|  * |aSource| to your new buffer.
 | |
|  *
 | |
|  * The new buffer is zero-terminated, but that may not help you if |aSource|
 | |
|  * contains embedded nulls.
 | |
|  *
 | |
|  * @param aSource a UTF-16 string (made of char16_t's)
 | |
|  * @param aUTF8Count the number of 8-bit units that was returned
 | |
|  * @return a new |char| buffer you must free with |free|.
 | |
|  */
 | |
| 
 | |
| char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr);
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * Returns a new |char16_t| buffer containing a zero-terminated copy of
 | |
|  * |aSource|.
 | |
|  *
 | |
|  * Allocates and returns a new |char16_t| buffer which you must free with
 | |
|  * |free|.
 | |
|  *
 | |
|  * The new buffer is zero-terminated, but that may not help you if |aSource|
 | |
|  * contains embedded nulls.
 | |
|  *
 | |
|  * @param aSource a UTF-16 string
 | |
|  * @return a new |char16_t| buffer you must free with |free|.
 | |
|  */
 | |
| char16_t* ToNewUnicode(const nsAString& aSource);
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * Returns a new |char16_t| buffer containing a zero-terminated copy of
 | |
|  * |aSource|.
 | |
|  *
 | |
|  * Allocates and returns a new |char16_t| buffer which you must free with
 | |
|  * |free|.
 | |
|  *
 | |
|  * Performs an encoding conversion by 0-padding 8-bit wide characters up to
 | |
|  * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource|
 | |
|  * to your new buffer.
 | |
|  *
 | |
|  * The new buffer is zero-terminated, but that may not help you if |aSource|
 | |
|  * contains embedded nulls.
 | |
|  *
 | |
|  * @param aSource a Latin1 string
 | |
|  * @return a new |char16_t| buffer you must free with |free|.
 | |
|  */
 | |
| char16_t* ToNewUnicode(const nsACString& aSource);
 | |
| 
 | |
| /**
 | |
|  * Returns a new |char16_t| buffer containing a zero-terminated copy
 | |
|  * of |aSource|.
 | |
|  *
 | |
|  * Allocates and returns a new |char| buffer which you must free with
 | |
|  * |free|.  Performs an encoding conversion from UTF-8 to UTF-16
 | |
|  * while copying |aSource| to your new buffer.  Malformed byte sequences
 | |
|  * are replaced with the REPLACEMENT CHARACTER.
 | |
|  *
 | |
|  * The new buffer is zero-terminated, but that may not help you if |aSource|
 | |
|  * contains embedded nulls.
 | |
|  *
 | |
|  * @param aSource an 8-bit wide string, UTF-8 encoded
 | |
|  * @param aUTF16Count the number of 16-bit units that was returned
 | |
|  * @return a new |char16_t| buffer you must free with |free|.
 | |
|  *         (UTF-16 encoded)
 | |
|  */
 | |
| char16_t* UTF8ToNewUnicode(const nsACString& aSource,
 | |
|                            uint32_t* aUTF16Count = nullptr);
 | |
| 
 | |
| /**
 | |
|  * Copies |aLength| 16-bit code units from the start of |aSource| to the
 | |
|  * |char16_t| buffer |aDest|.
 | |
|  *
 | |
|  * After this operation |aDest| is not null terminated.
 | |
|  *
 | |
|  * @param aSource a UTF-16 string
 | |
|  * @param aSrcOffset start offset in the source string
 | |
|  * @param aDest a |char16_t| buffer
 | |
|  * @param aLength the number of 16-bit code units to copy
 | |
|  * @return pointer to destination buffer - identical to |aDest|
 | |
|  */
 | |
| char16_t* CopyUnicodeTo(const nsAString& aSource,
 | |
|                         uint32_t aSrcOffset,
 | |
|                         char16_t* aDest,
 | |
|                         uint32_t aLength);
 | |
| 
 | |
| /**
 | |
|  * Returns |true| if |aString| contains only ASCII characters, that is,
 | |
|  * characters in the range (0x00, 0x7F).
 | |
|  *
 | |
|  * @param aString a 16-bit wide string to scan
 | |
|  */
 | |
| inline bool
 | |
| IsASCII(mozilla::Span<const char16_t> aString)
 | |
| {
 | |
|   size_t length = aString.Length();
 | |
|   const char16_t* ptr = aString.Elements();
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < 16) {
 | |
|     char16_t accu = 0;
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       accu |= ptr[i];
 | |
|     }
 | |
|     return accu < 0x80U;
 | |
|   }
 | |
|   return encoding_mem_is_basic_latin(ptr, length);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| if |aString| contains only ASCII characters, that is,
 | |
|  * characters in the range (0x00, 0x7F).
 | |
|  *
 | |
|  * @param aString a 8-bit wide string to scan
 | |
|  */
 | |
| inline bool
 | |
| IsASCII(mozilla::Span<const char> aString)
 | |
| {
 | |
|   size_t length = aString.Length();
 | |
|   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < 16) {
 | |
|     uint8_t accu = 0;
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       accu |= ptr[i];
 | |
|     }
 | |
|     return accu < 0x80U;
 | |
|   }
 | |
|   return encoding_mem_is_ascii(ptr, length);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| if |aString| contains only Latin1 characters, that is,
 | |
|  * characters in the range (U+0000, U+00FF).
 | |
|  *
 | |
|  * @param aString a potentially-invalid UTF-16 string to scan
 | |
|  */
 | |
| inline bool
 | |
| IsUTF16Latin1(mozilla::Span<const char16_t> aString)
 | |
| {
 | |
|   size_t length = aString.Length();
 | |
|   const char16_t* ptr = aString.Elements();
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < 16) {
 | |
|     char16_t accu = 0;
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       accu |= ptr[i];
 | |
|     }
 | |
|     return accu < 0x100U;
 | |
|   }
 | |
|   return encoding_mem_is_utf16_latin1(ptr, length);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| if |aString| contains only Latin1 characters, that is,
 | |
|  * characters in the range (U+0000, U+00FF).
 | |
|  *
 | |
|  * If you know that the argument is always absolutely guaranteed to be valid
 | |
|  * UTF-8, use the faster UnsafeIsValidUTF8Latin1() instead.
 | |
|  *
 | |
|  * @param aString potentially-invalid UTF-8 string to scan
 | |
|  */
 | |
| inline bool
 | |
| IsUTF8Latin1(mozilla::Span<const char> aString)
 | |
| {
 | |
|   size_t length = aString.Length();
 | |
|   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < 16) {
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       if (ptr[i] >= 0x80U) {
 | |
|         ptr += i;
 | |
|         length -= i;
 | |
|         // This loop can't handle non-ASCII, but the Rust code can, so
 | |
|         // upon seeing non-ASCII, break the loop and let the Rust code
 | |
|         // handle the rest of the buffer (including the non-ASCII byte).
 | |
|         goto end;
 | |
|       }
 | |
|     }
 | |
|     return true;
 | |
|   }
 | |
| end:
 | |
|   return encoding_mem_is_utf8_latin1(ptr, length);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| if |aString| contains only Latin1 characters, that is,
 | |
|  * characters in the range (U+0000, U+00FF).
 | |
|  *
 | |
|  * The argument MUST be valid UTF-8. If you are at all unsure, use IsUTF8Latin1
 | |
|  * instead!
 | |
|  *
 | |
|  * @param aString known-valid UTF-8 string to scan
 | |
|  */
 | |
| inline bool
 | |
| UnsafeIsValidUTF8Latin1(mozilla::Span<const char> aString)
 | |
| {
 | |
|   size_t length = aString.Length();
 | |
|   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < 16) {
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       if (ptr[i] >= 0x80U) {
 | |
|         ptr += i;
 | |
|         length -= i;
 | |
|         goto end;
 | |
|       }
 | |
|     }
 | |
|     return true;
 | |
|   }
 | |
| end:
 | |
|   return encoding_mem_is_str_latin1(ptr, length);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns |true| if |aString| is a valid UTF-8 string.
 | |
|  *
 | |
|  * Note that this doesn't check whether the string might look like a valid
 | |
|  * string in another encoding, too, e.g. ISO-2022-JP.
 | |
|  *
 | |
|  * @param aString an 8-bit wide string to scan
 | |
|  */
 | |
| inline bool
 | |
| IsUTF8(mozilla::Span<const char> aString)
 | |
| {
 | |
|   size_t length = aString.Length();
 | |
|   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
 | |
|   // For short strings, calling into Rust is a pessimization, and the SIMD
 | |
|   // code won't have a chance to kick in anyway.
 | |
|   if (length < 16) {
 | |
|     for (size_t i = 0; i < length; i++) {
 | |
|       if (ptr[i] >= 0x80U) {
 | |
|         ptr += i;
 | |
|         length -= i;
 | |
|         goto end;
 | |
|       }
 | |
|     }
 | |
|     return true;
 | |
|   }
 | |
|   end:
 | |
|   return length == encoding_utf8_valid_up_to(ptr, length);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Returns the index of the first unpaired surrogate or
 | |
|  * the length of the string if there are none.
 | |
|  */
 | |
| inline uint32_t
 | |
| UTF16ValidUpTo(mozilla::Span<const char16_t> aString)
 | |
| {
 | |
|   return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Replaces unpaired surrogates with U+FFFD in the argument.
 | |
|  */
 | |
| inline void
 | |
| EnsureUTF16ValiditySpan(mozilla::Span<char16_t> aString)
 | |
| {
 | |
|   encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Replaces unpaired surrogates with U+FFFD in the argument.
 | |
|  *
 | |
|  * Copies a shared string buffer or an otherwise read-only
 | |
|  * buffer only if there are unpaired surrogates.
 | |
|  */
 | |
| inline void
 | |
| EnsureUTF16Validity(nsAString& aString)
 | |
| {
 | |
|   uint32_t upTo = UTF16ValidUpTo(aString);
 | |
|   uint32_t len = aString.Length();
 | |
|   if (upTo == len) {
 | |
|     return;
 | |
|   }
 | |
|   char16_t* ptr = aString.BeginWriting();
 | |
|   auto span = mozilla::MakeSpan(ptr, len);
 | |
|   span[upTo] = 0xFFFD;
 | |
|   EnsureUTF16ValiditySpan(span.From(upTo + 1));
 | |
| }
 | |
| 
 | |
| bool ParseString(const nsACString& aAstring, char aDelimiter,
 | |
|                  nsTArray<nsCString>& aArray);
 | |
| 
 | |
| /**
 | |
|  * Converts case in place in the argument string.
 | |
|  */
 | |
| void ToUpperCase(nsACString&);
 | |
| 
 | |
| void ToLowerCase(nsACString&);
 | |
| 
 | |
| void ToUpperCase(nsACString&);
 | |
| 
 | |
| void ToLowerCase(nsACString&);
 | |
| 
 | |
| /**
 | |
|  * Converts case from string aSource to aDest.
 | |
|  */
 | |
| void ToUpperCase(const nsACString& aSource, nsACString& aDest);
 | |
| 
 | |
| void ToLowerCase(const nsACString& aSource, nsACString& aDest);
 | |
| 
 | |
| /**
 | |
|  * Finds the leftmost occurrence of |aPattern|, if any in the range
 | |
|  * |aSearchStart|..|aSearchEnd|.
 | |
|  *
 | |
|  * Returns |true| if a match was found, and adjusts |aSearchStart| and
 | |
|  * |aSearchEnd| to point to the match.  If no match was found, returns |false|
 | |
|  * and makes |aSearchStart == aSearchEnd|.
 | |
|  *
 | |
|  * Currently, this is equivalent to the O(m*n) implementation previously on
 | |
|  * |ns[C]String|.
 | |
|  *
 | |
|  * If we need something faster, then we can implement that later.
 | |
|  */
 | |
| 
 | |
| bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
 | |
|                     nsAString::const_iterator&,
 | |
|                     const nsStringComparator& = nsDefaultStringComparator());
 | |
| bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
 | |
|                     nsACString::const_iterator&,
 | |
|                     const nsCStringComparator& = nsDefaultCStringComparator());
 | |
| 
 | |
| /* sometimes we don't care about where the string was, just that we
 | |
|  * found it or not */
 | |
| inline bool
 | |
| FindInReadable(const nsAString& aPattern, const nsAString& aSource,
 | |
|                const nsStringComparator& aCompare = nsDefaultStringComparator())
 | |
| {
 | |
|   nsAString::const_iterator start, end;
 | |
|   aSource.BeginReading(start);
 | |
|   aSource.EndReading(end);
 | |
|   return FindInReadable(aPattern, start, end, aCompare);
 | |
| }
 | |
| 
 | |
| inline bool
 | |
| FindInReadable(const nsACString& aPattern, const nsACString& aSource,
 | |
|                const nsCStringComparator& aCompare = nsDefaultCStringComparator())
 | |
| {
 | |
|   nsACString::const_iterator start, end;
 | |
|   aSource.BeginReading(start);
 | |
|   aSource.EndReading(end);
 | |
|   return FindInReadable(aPattern, start, end, aCompare);
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
 | |
|                                    nsACString::const_iterator&,
 | |
|                                    nsACString::const_iterator&);
 | |
| 
 | |
| /**
 | |
|  * Finds the rightmost occurrence of |aPattern|
 | |
|  * Returns |true| if a match was found, and adjusts |aSearchStart| and
 | |
|  * |aSearchEnd| to point to the match.  If no match was found, returns |false|
 | |
|  * and makes |aSearchStart == aSearchEnd|.
 | |
|  */
 | |
| bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
 | |
|                      nsAString::const_iterator&,
 | |
|                      const nsStringComparator& = nsDefaultStringComparator());
 | |
| bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
 | |
|                      nsACString::const_iterator&,
 | |
|                      const nsCStringComparator& = nsDefaultCStringComparator());
 | |
| 
 | |
| /**
 | |
| * Finds the leftmost occurrence of |aChar|, if any in the range
 | |
| * |aSearchStart|..|aSearchEnd|.
 | |
| *
 | |
| * Returns |true| if a match was found, and adjusts |aSearchStart| to
 | |
| * point to the match.  If no match was found, returns |false| and
 | |
| * makes |aSearchStart == aSearchEnd|.
 | |
| */
 | |
| bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
 | |
|                         const nsAString::const_iterator& aSearchEnd);
 | |
| bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
 | |
|                         const nsACString::const_iterator& aSearchEnd);
 | |
| 
 | |
| /**
 | |
| * Finds the number of occurences of |aChar| in the string |aStr|
 | |
| */
 | |
| uint32_t CountCharInReadable(const nsAString& aStr,
 | |
|                              char16_t aChar);
 | |
| uint32_t CountCharInReadable(const nsACString& aStr,
 | |
|                              char aChar);
 | |
| 
 | |
| bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring);
 | |
| bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
 | |
|                       const nsStringComparator& aComparator);
 | |
| bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring);
 | |
| bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
 | |
|                       const nsCStringComparator& aComparator);
 | |
| bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring);
 | |
| bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
 | |
|                     const nsStringComparator& aComparator);
 | |
| bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring);
 | |
| bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
 | |
|                     const nsCStringComparator& aComparator);
 | |
| 
 | |
| const nsString& EmptyString();
 | |
| const nsCString& EmptyCString();
 | |
| 
 | |
| const nsString& VoidString();
 | |
| const nsCString& VoidCString();
 | |
| 
 | |
| /**
 | |
|  * Compare a UTF-8 string to an UTF-16 string.
 | |
|  *
 | |
|  * Returns 0 if the strings are equal, -1 if aUTF8String is less
 | |
|  * than aUTF16Count, and 1 in the reverse case. Errors are replaced
 | |
|  * with U+FFFD and then the U+FFFD is compared as if it had occurred
 | |
|  * in the input. If aErr is not nullptr, *aErr is set to true if
 | |
|  * either string had malformed sequences.
 | |
|  */
 | |
| int32_t
 | |
| CompareUTF8toUTF16(const nsACString& aUTF8String,
 | |
|                    const nsAString& aUTF16String,
 | |
|                    bool* aErr = nullptr);
 | |
| 
 | |
| void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
 | |
| 
 | |
| template<class T>
 | |
| inline bool
 | |
| EnsureStringLength(T& aStr, uint32_t aLen)
 | |
| {
 | |
|   aStr.SetLength(aLen);
 | |
|   return (aStr.Length() == aLen);
 | |
| }
 | |
| 
 | |
| #endif // !defined(nsReadableUtils_h___)
 |