forked from mirrors/gecko-dev
		
	 265e672179
			
		
	
	
		265e672179
		
	
	
	
	
		
			
			# ignore-this-changeset --HG-- extra : amend_source : 4d301d3b0b8711c4692392aa76088ba7fd7d1022
		
			
				
	
	
		
			452 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			452 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 | |
| /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| #include "nsLinebreakConverter.h"
 | |
| 
 | |
| #include "nsMemory.h"
 | |
| #include "nsCRT.h"
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   GetLinebreakString
 | |
| 
 | |
|   Could make this inline
 | |
| ----------------------------------------------------------------------------*/
 | |
| static const char* GetLinebreakString(
 | |
|     nsLinebreakConverter::ELinebreakType aBreakType) {
 | |
|   static const char* const sLinebreaks[] = {"",            // any
 | |
|                                             NS_LINEBREAK,  // platform
 | |
|                                             LFSTR,         // content
 | |
|                                             CRLF,          // net
 | |
|                                             CRSTR,         // Mac
 | |
|                                             LFSTR,         // Unix
 | |
|                                             CRLF,          // Windows
 | |
|                                             " ",           // space
 | |
|                                             nullptr};
 | |
| 
 | |
|   return sLinebreaks[aBreakType];
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   AppendLinebreak
 | |
| 
 | |
|   Wee inline method to append a line break. Modifies ioDest.
 | |
| ----------------------------------------------------------------------------*/
 | |
| template <class T>
 | |
| void AppendLinebreak(T*& aIoDest, const char* aLineBreakStr) {
 | |
|   *aIoDest++ = *aLineBreakStr;
 | |
| 
 | |
|   if (aLineBreakStr[1]) {
 | |
|     *aIoDest++ = aLineBreakStr[1];
 | |
|   }
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   CountChars
 | |
| 
 | |
|   Counts occurrences of breakStr in aSrc
 | |
| ----------------------------------------------------------------------------*/
 | |
| template <class T>
 | |
| int32_t CountLinebreaks(const T* aSrc, int32_t aInLen, const char* aBreakStr) {
 | |
|   const T* src = aSrc;
 | |
|   const T* srcEnd = aSrc + aInLen;
 | |
|   int32_t theCount = 0;
 | |
| 
 | |
|   while (src < srcEnd) {
 | |
|     if (*src == *aBreakStr) {
 | |
|       src++;
 | |
| 
 | |
|       if (aBreakStr[1]) {
 | |
|         if (src < srcEnd && *src == aBreakStr[1]) {
 | |
|           src++;
 | |
|           theCount++;
 | |
|         }
 | |
|       } else {
 | |
|         theCount++;
 | |
|       }
 | |
|     } else {
 | |
|       src++;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return theCount;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertBreaks
 | |
| 
 | |
|   ioLen *includes* a terminating null, if any
 | |
| ----------------------------------------------------------------------------*/
 | |
| template <class T>
 | |
| static T* ConvertBreaks(const T* aInSrc, int32_t& aIoLen, const char* aSrcBreak,
 | |
|                         const char* aDestBreak) {
 | |
|   NS_ASSERTION(aInSrc && aSrcBreak && aDestBreak, "Got a null string");
 | |
| 
 | |
|   T* resultString = nullptr;
 | |
| 
 | |
|   // handle the no conversion case
 | |
|   if (nsCRT::strcmp(aSrcBreak, aDestBreak) == 0) {
 | |
|     resultString = (T*)malloc(sizeof(T) * aIoLen);
 | |
|     if (!resultString) {
 | |
|       return nullptr;
 | |
|     }
 | |
|     memcpy(resultString, aInSrc,
 | |
|            sizeof(T) * aIoLen);  // includes the null, if any
 | |
|     return resultString;
 | |
|   }
 | |
| 
 | |
|   int32_t srcBreakLen = strlen(aSrcBreak);
 | |
|   int32_t destBreakLen = strlen(aDestBreak);
 | |
| 
 | |
|   // handle the easy case, where the string length does not change, and the
 | |
|   // breaks are only 1 char long, i.e. CR <-> LF
 | |
|   if (srcBreakLen == destBreakLen && srcBreakLen == 1) {
 | |
|     resultString = (T*)malloc(sizeof(T) * aIoLen);
 | |
|     if (!resultString) {
 | |
|       return nullptr;
 | |
|     }
 | |
| 
 | |
|     const T* src = aInSrc;
 | |
|     const T* srcEnd = aInSrc + aIoLen;  // includes null, if any
 | |
|     T* dst = resultString;
 | |
| 
 | |
|     char srcBreakChar = *aSrcBreak;  // we know it's one char long already
 | |
|     char dstBreakChar = *aDestBreak;
 | |
| 
 | |
|     while (src < srcEnd) {
 | |
|       if (*src == srcBreakChar) {
 | |
|         *dst++ = dstBreakChar;
 | |
|         src++;
 | |
|       } else {
 | |
|         *dst++ = *src++;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // aIoLen does not change
 | |
|   } else {
 | |
|     // src and dest termination is different length. Do it a slower way.
 | |
| 
 | |
|     // count linebreaks in src. Assumes that chars in 2-char linebreaks are
 | |
|     // unique.
 | |
|     int32_t numLinebreaks = CountLinebreaks(aInSrc, aIoLen, aSrcBreak);
 | |
| 
 | |
|     int32_t newBufLen =
 | |
|         aIoLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
 | |
|     resultString = (T*)malloc(sizeof(T) * newBufLen);
 | |
|     if (!resultString) {
 | |
|       return nullptr;
 | |
|     }
 | |
| 
 | |
|     const T* src = aInSrc;
 | |
|     const T* srcEnd = aInSrc + aIoLen;  // includes null, if any
 | |
|     T* dst = resultString;
 | |
| 
 | |
|     while (src < srcEnd) {
 | |
|       if (*src == *aSrcBreak) {
 | |
|         *dst++ = *aDestBreak;
 | |
|         if (aDestBreak[1]) {
 | |
|           *dst++ = aDestBreak[1];
 | |
|         }
 | |
| 
 | |
|         src++;
 | |
|         if (src < srcEnd && aSrcBreak[1] && *src == aSrcBreak[1]) {
 | |
|           src++;
 | |
|         }
 | |
|       } else {
 | |
|         *dst++ = *src++;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     aIoLen = newBufLen;
 | |
|   }
 | |
| 
 | |
|   return resultString;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertBreaksInSitu
 | |
| 
 | |
|   Convert breaks in situ. Can only do this if the linebreak length
 | |
|   does not change.
 | |
| ----------------------------------------------------------------------------*/
 | |
| template <class T>
 | |
| static void ConvertBreaksInSitu(T* aInSrc, int32_t aInLen, char aSrcBreak,
 | |
|                                 char aDestBreak) {
 | |
|   T* src = aInSrc;
 | |
|   T* srcEnd = aInSrc + aInLen;
 | |
| 
 | |
|   while (src < srcEnd) {
 | |
|     if (*src == aSrcBreak) {
 | |
|       *src = aDestBreak;
 | |
|     }
 | |
| 
 | |
|     src++;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertUnknownBreaks
 | |
| 
 | |
|   Convert unknown line breaks to the specified break.
 | |
| 
 | |
|   This will convert CRLF pairs to one break, and single CR or LF to a break.
 | |
| ----------------------------------------------------------------------------*/
 | |
| template <class T>
 | |
| static T* ConvertUnknownBreaks(const T* aInSrc, int32_t& aIoLen,
 | |
|                                const char* aDestBreak) {
 | |
|   const T* src = aInSrc;
 | |
|   const T* srcEnd = aInSrc + aIoLen;  // includes null, if any
 | |
| 
 | |
|   int32_t destBreakLen = strlen(aDestBreak);
 | |
|   int32_t finalLen = 0;
 | |
| 
 | |
|   while (src < srcEnd) {
 | |
|     if (*src == nsCRT::CR) {
 | |
|       if (src < srcEnd && src[1] == nsCRT::LF) {
 | |
|         // CRLF
 | |
|         finalLen += destBreakLen;
 | |
|         src++;
 | |
|       } else {
 | |
|         // Lone CR
 | |
|         finalLen += destBreakLen;
 | |
|       }
 | |
|     } else if (*src == nsCRT::LF) {
 | |
|       // Lone LF
 | |
|       finalLen += destBreakLen;
 | |
|     } else {
 | |
|       finalLen++;
 | |
|     }
 | |
|     src++;
 | |
|   }
 | |
| 
 | |
|   T* resultString = (T*)malloc(sizeof(T) * finalLen);
 | |
|   if (!resultString) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   src = aInSrc;
 | |
|   srcEnd = aInSrc + aIoLen;  // includes null, if any
 | |
| 
 | |
|   T* dst = resultString;
 | |
| 
 | |
|   while (src < srcEnd) {
 | |
|     if (*src == nsCRT::CR) {
 | |
|       if (src < srcEnd && src[1] == nsCRT::LF) {
 | |
|         // CRLF
 | |
|         AppendLinebreak(dst, aDestBreak);
 | |
|         src++;
 | |
|       } else {
 | |
|         // Lone CR
 | |
|         AppendLinebreak(dst, aDestBreak);
 | |
|       }
 | |
|     } else if (*src == nsCRT::LF) {
 | |
|       // Lone LF
 | |
|       AppendLinebreak(dst, aDestBreak);
 | |
|     } else {
 | |
|       *dst++ = *src;
 | |
|     }
 | |
|     src++;
 | |
|   }
 | |
| 
 | |
|   aIoLen = finalLen;
 | |
|   return resultString;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertLineBreaks
 | |
| 
 | |
| ----------------------------------------------------------------------------*/
 | |
| char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
 | |
|                                               ELinebreakType aSrcBreaks,
 | |
|                                               ELinebreakType aDestBreaks,
 | |
|                                               int32_t aSrcLen,
 | |
|                                               int32_t* aOutLen) {
 | |
|   NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
 | |
|                "Invalid parameter");
 | |
|   if (!aSrc) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
 | |
| 
 | |
|   char* resultString;
 | |
|   if (aSrcBreaks == eLinebreakAny) {
 | |
|     resultString =
 | |
|         ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks));
 | |
|   } else
 | |
|     resultString =
 | |
|         ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks),
 | |
|                       GetLinebreakString(aDestBreaks));
 | |
| 
 | |
|   if (aOutLen) {
 | |
|     *aOutLen = sourceLen;
 | |
|   }
 | |
|   return resultString;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertLineBreaksInSitu
 | |
| 
 | |
| ----------------------------------------------------------------------------*/
 | |
| nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(
 | |
|     char** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
 | |
|     int32_t aSrcLen, int32_t* aOutLen) {
 | |
|   NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed");
 | |
|   if (!aIoBuffer || !*aIoBuffer) {
 | |
|     return NS_ERROR_NULL_POINTER;
 | |
|   }
 | |
| 
 | |
|   NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
 | |
|                "Invalid parameter");
 | |
| 
 | |
|   int32_t sourceLen =
 | |
|       (aSrcLen == kIgnoreLen) ? strlen(*aIoBuffer) + 1 : aSrcLen;
 | |
| 
 | |
|   // can we convert in-place?
 | |
|   const char* srcBreaks = GetLinebreakString(aSrcBreaks);
 | |
|   const char* dstBreaks = GetLinebreakString(aDestBreaks);
 | |
| 
 | |
|   if (aSrcBreaks != eLinebreakAny && strlen(srcBreaks) == 1 &&
 | |
|       strlen(dstBreaks) == 1) {
 | |
|     ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks);
 | |
|     if (aOutLen) {
 | |
|       *aOutLen = sourceLen;
 | |
|     }
 | |
|   } else {
 | |
|     char* destBuffer;
 | |
| 
 | |
|     if (aSrcBreaks == eLinebreakAny) {
 | |
|       destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks);
 | |
|     } else {
 | |
|       destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks);
 | |
|     }
 | |
| 
 | |
|     if (!destBuffer) {
 | |
|       return NS_ERROR_OUT_OF_MEMORY;
 | |
|     }
 | |
|     *aIoBuffer = destBuffer;
 | |
|     if (aOutLen) {
 | |
|       *aOutLen = sourceLen;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NS_OK;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertUnicharLineBreaks
 | |
| 
 | |
| ----------------------------------------------------------------------------*/
 | |
| char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(
 | |
|     const char16_t* aSrc, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
 | |
|     int32_t aSrcLen, int32_t* aOutLen) {
 | |
|   NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
 | |
|                "Invalid parameter");
 | |
|   if (!aSrc) {
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen;
 | |
| 
 | |
|   char16_t* resultString;
 | |
|   if (aSrcBreaks == eLinebreakAny) {
 | |
|     resultString =
 | |
|         ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks));
 | |
|   } else
 | |
|     resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks),
 | |
|                                  GetLinebreakString(aDestBreaks));
 | |
| 
 | |
|   if (aOutLen) {
 | |
|     *aOutLen = bufLen;
 | |
|   }
 | |
|   return resultString;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertStringLineBreaks
 | |
| 
 | |
| ----------------------------------------------------------------------------*/
 | |
| nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(
 | |
|     char16_t** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
 | |
|     int32_t aSrcLen, int32_t* aOutLen) {
 | |
|   NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed");
 | |
|   if (!aIoBuffer || !*aIoBuffer) {
 | |
|     return NS_ERROR_NULL_POINTER;
 | |
|   }
 | |
|   NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
 | |
|                "Invalid parameter");
 | |
| 
 | |
|   int32_t sourceLen =
 | |
|       (aSrcLen == kIgnoreLen) ? NS_strlen(*aIoBuffer) + 1 : aSrcLen;
 | |
| 
 | |
|   // can we convert in-place?
 | |
|   const char* srcBreaks = GetLinebreakString(aSrcBreaks);
 | |
|   const char* dstBreaks = GetLinebreakString(aDestBreaks);
 | |
| 
 | |
|   if ((aSrcBreaks != eLinebreakAny) && (strlen(srcBreaks) == 1) &&
 | |
|       (strlen(dstBreaks) == 1)) {
 | |
|     ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks);
 | |
|     if (aOutLen) {
 | |
|       *aOutLen = sourceLen;
 | |
|     }
 | |
|   } else {
 | |
|     char16_t* destBuffer;
 | |
| 
 | |
|     if (aSrcBreaks == eLinebreakAny) {
 | |
|       destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks);
 | |
|     } else {
 | |
|       destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks);
 | |
|     }
 | |
| 
 | |
|     if (!destBuffer) {
 | |
|       return NS_ERROR_OUT_OF_MEMORY;
 | |
|     }
 | |
|     *aIoBuffer = destBuffer;
 | |
|     if (aOutLen) {
 | |
|       *aOutLen = sourceLen;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return NS_OK;
 | |
| }
 | |
| 
 | |
| /*----------------------------------------------------------------------------
 | |
|   ConvertStringLineBreaks
 | |
| 
 | |
| ----------------------------------------------------------------------------*/
 | |
| nsresult nsLinebreakConverter::ConvertStringLineBreaks(
 | |
|     nsString& aIoString, ELinebreakType aSrcBreaks,
 | |
|     ELinebreakType aDestBreaks) {
 | |
|   NS_ASSERTION(aDestBreaks != eLinebreakAny && aSrcBreaks != eLinebreakSpace,
 | |
|                "Invalid parameter");
 | |
| 
 | |
|   // nothing to do
 | |
|   if (aIoString.IsEmpty()) {
 | |
|     return NS_OK;
 | |
|   }
 | |
| 
 | |
|   nsresult rv;
 | |
| 
 | |
|   // remember the old buffer in case
 | |
|   // we blow it away later
 | |
|   auto stringBuf = aIoString.BeginWriting(mozilla::fallible);
 | |
|   if (!stringBuf) {
 | |
|     return NS_ERROR_OUT_OF_MEMORY;
 | |
|   }
 | |
| 
 | |
|   int32_t newLen;
 | |
| 
 | |
|   rv = ConvertUnicharLineBreaksInSitu(&stringBuf, aSrcBreaks, aDestBreaks,
 | |
|                                       aIoString.Length() + 1, &newLen);
 | |
|   if (NS_FAILED(rv)) {
 | |
|     return rv;
 | |
|   }
 | |
| 
 | |
|   if (stringBuf != aIoString.get()) {
 | |
|     aIoString.Adopt(stringBuf, newLen - 1);
 | |
|   }
 | |
| 
 | |
|   return NS_OK;
 | |
| }
 |