fune/xpcom/string/nsTStringObsolete.cpp
Henri Sivonen 3edc601325 Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj.
Correctness improvements:

 * UTF errors are handled safely per spec instead of dangerously truncating
   strings.

 * There are fewer converter implementations.

Performance improvements:

 * The old code did exact buffer length math, which meant doing UTF math twice
   on each input string (once for length calculation and another time for
   conversion). Exact length math is more complicated when handling errors
   properly, which the old code didn't do. The new code does UTF math on the
   string content only once (when converting) but risks allocating more than
   once. There are heuristics in place to lower the probability of
   reallocation in cases where the double math avoidance isn't enough of a
   saving to absorb an allocation and memcpy.

 * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized
   but a single non-ASCII code point pessimized the rest of the string. The
   new code tries to get back on the fast ASCII path.

 * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range
   input to eliminate an operation from the inner loop on x86/x86_64.

 * When assigning to a pre-existing string, the new code tries to reuse the
   old buffer instead of first releasing the old buffer and then allocating a
   new one.

 * When reallocating from the new code, the memcpy covers only the data that
   is part of the logical length of the old string instead of memcpying the
   whole capacity. (For old callers old excess memcpy behavior is preserved
   due to bogus callers. See bug 1472113.)

 * UTF-8 strings in XPConnect that are in the Latin1 range are passed to
   SpiderMonkey as Latin1.

New features:

 * Conversion between UTF-8 and Latin1 is added in order to enable faster
   future interop between Rust code (or otherwise UTF-8-using code) and text
   node and SpiderMonkey code that uses Latin1.

MozReview-Commit-ID: JaJuExfILM9
2018-08-14 14:43:42 +03:00

489 lines
13 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsTArray.h"
#include "nsASCIIMask.h"
#include "mozilla/CheckedInt.h"
/**
* nsTString::Find
*
* aOffset specifies starting index
* aCount specifies number of string compares (iterations)
*/
template <typename T>
int32_t
nsTString<T>::Find(const nsTString<char>& aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const
{
// this method changes the meaning of aOffset and aCount:
Find_ComputeSearchRange(this->mLength, aString.Length(), aOffset, aCount);
int32_t result = FindSubstring(this->mData + aOffset, aCount, aString.get(), aString.Length(), aIgnoreCase);
if (result != kNotFound)
result += aOffset;
return result;
}
template <typename T>
int32_t
nsTString<T>::Find(const char* aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const
{
return Find(nsTDependentString<char>(aString), aIgnoreCase, aOffset, aCount);
}
/**
* nsTString::RFind
*
* aOffset specifies starting index
* aCount specifies number of string compares (iterations)
*/
template <typename T>
int32_t
nsTString<T>::RFind(const nsTString<char>& aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const
{
// this method changes the meaning of aOffset and aCount:
RFind_ComputeSearchRange(this->mLength, aString.Length(), aOffset, aCount);
int32_t result = RFindSubstring(this->mData + aOffset, aCount, aString.get(), aString.Length(), aIgnoreCase);
if (result != kNotFound)
result += aOffset;
return result;
}
template <typename T>
int32_t
nsTString<T>::RFind(const char* aString, bool aIgnoreCase, int32_t aOffset, int32_t aCount) const
{
return RFind(nsTDependentString<char>(aString), aIgnoreCase, aOffset, aCount);
}
/**
* nsTString::RFindChar
*/
template <typename T>
int32_t
nsTString<T>::RFindChar(char16_t aChar, int32_t aOffset, int32_t aCount) const
{
return nsBufferRoutines<T>::rfind_char(this->mData, this->mLength, aOffset, aChar, aCount);
}
/**
* nsTString::FindCharInSet
*/
template <typename T>
int32_t
nsTString<T>::FindCharInSet(const char_type* aSet, int32_t aOffset) const
{
if (aOffset < 0)
aOffset = 0;
else if (aOffset >= int32_t(this->mLength))
return kNotFound;
int32_t result = ::FindCharInSet(this->mData + aOffset, this->mLength - aOffset, aSet);
if (result != kNotFound)
result += aOffset;
return result;
}
/**
* nsTString::RFindCharInSet
*/
template <typename T>
int32_t
nsTString<T>::RFindCharInSet(const char_type* aSet, int32_t aOffset) const
{
// We want to pass a "data length" to ::RFindCharInSet
if (aOffset < 0 || aOffset > int32_t(this->mLength))
aOffset = this->mLength;
else
++aOffset;
return ::RFindCharInSet(this->mData, aOffset, aSet);
}
/**
* nsTString::Mid
*/
template <typename T>
typename nsTString<T>::size_type
nsTString<T>::Mid(self_type& aResult, index_type aStartPos, size_type aLengthToCopy) const
{
if (aStartPos == 0 && aLengthToCopy >= this->mLength)
aResult = *this;
else
aResult = Substring(*this, aStartPos, aLengthToCopy);
return aResult.mLength;
}
/**
* nsTString::SetCharAt
*/
template <typename T>
bool
nsTString<T>::SetCharAt(char16_t aChar, uint32_t aIndex)
{
if (aIndex >= this->mLength)
return false;
if (!this->EnsureMutable())
this->AllocFailed(this->mLength);
this->mData[aIndex] = char_type(aChar);
return true;
}
/**
* nsTString::StripChars,StripChar,StripWhitespace
*/
template<typename T>
template<typename Q, typename EnableIfChar16>
void
nsTString<T>::StripChars(const incompatible_char_type* aSet)
{
if (!StripChars(aSet, mozilla::fallible)) {
this->AllocFailed(this->mLength);
}
}
template<typename T>
template<typename Q, typename EnableIfChar16>
bool
nsTString<T>::StripChars(const incompatible_char_type* aSet, const fallible_t&)
{
if (!this->EnsureMutable()) {
return false;
}
this->mLength = nsBufferRoutines<T>::strip_chars(this->mData, this->mLength, aSet);
return true;
}
template<typename T>
void
nsTString<T>::StripChars(const char_type* aSet)
{
nsTSubstring<T>::StripChars(aSet);
}
template <typename T>
void
nsTString<T>::StripWhitespace()
{
if (!StripWhitespace(mozilla::fallible)) {
this->AllocFailed(this->mLength);
}
}
template <typename T>
bool
nsTString<T>::StripWhitespace(const fallible_t&)
{
if (!this->EnsureMutable()) {
return false;
}
this->StripTaggedASCII(mozilla::ASCIIMask::MaskWhitespace());
return true;
}
/**
* nsTString::ReplaceChar,ReplaceSubstring
*/
template <typename T>
void
nsTString<T>::ReplaceChar(char_type aOldChar, char_type aNewChar)
{
if (!this->EnsureMutable()) // XXX do this lazily?
this->AllocFailed(this->mLength);
for (uint32_t i=0; i<this->mLength; ++i)
{
if (this->mData[i] == aOldChar)
this->mData[i] = aNewChar;
}
}
template <typename T>
void
nsTString<T>::ReplaceChar(const char_type* aSet, char_type aNewChar)
{
if (!this->EnsureMutable()) // XXX do this lazily?
this->AllocFailed(this->mLength);
char_type* data = this->mData;
uint32_t lenRemaining = this->mLength;
while (lenRemaining)
{
int32_t i = ::FindCharInSet(data, lenRemaining, aSet);
if (i == kNotFound)
break;
data[i++] = aNewChar;
data += i;
lenRemaining -= i;
}
}
void ReleaseData(void* aData, nsAString::DataFlags aFlags);
template <typename T>
void
nsTString<T>::ReplaceSubstring(const char_type* aTarget,
const char_type* aNewValue)
{
ReplaceSubstring(nsTDependentString<T>(aTarget),
nsTDependentString<T>(aNewValue));
}
template <typename T>
bool
nsTString<T>::ReplaceSubstring(const char_type* aTarget,
const char_type* aNewValue,
const fallible_t& aFallible)
{
return ReplaceSubstring(nsTDependentString<T>(aTarget),
nsTDependentString<T>(aNewValue),
aFallible);
}
template <typename T>
void
nsTString<T>::ReplaceSubstring(const self_type& aTarget,
const self_type& aNewValue)
{
if (!ReplaceSubstring(aTarget, aNewValue, mozilla::fallible)) {
// Note that this may wildly underestimate the allocation that failed, as
// we could have been replacing multiple copies of aTarget.
this->AllocFailed(this->mLength + (aNewValue.Length() - aTarget.Length()));
}
}
template <typename T>
bool
nsTString<T>::ReplaceSubstring(const self_type& aTarget,
const self_type& aNewValue,
const fallible_t&)
{
if (aTarget.Length() == 0)
return true;
// Remember all of the non-matching parts.
AutoTArray<Segment, 16> nonMatching;
uint32_t i = 0;
mozilla::CheckedUint32 newLength;
while (true)
{
int32_t r = FindSubstring(this->mData + i, this->mLength - i, static_cast<const char_type*>(aTarget.Data()), aTarget.Length(), false);
int32_t until = (r == kNotFound) ? this->mLength - i : r;
nonMatching.AppendElement(Segment(i, until));
newLength += until;
if (r == kNotFound) {
break;
}
newLength += aNewValue.Length();
i += r + aTarget.Length();
if (i >= this->mLength) {
// Add an auxiliary entry at the end of the list to help as an edge case
// for the algorithms below.
nonMatching.AppendElement(Segment(this->mLength, 0));
break;
}
}
if (!newLength.isValid()) {
return false;
}
// If there's only one non-matching segment, then the target string was not
// found, and there's nothing to do.
if (nonMatching.Length() == 1) {
MOZ_ASSERT(nonMatching[0].mBegin == 0 && nonMatching[0].mLength == this->mLength,
"We should have the correct non-matching segment.");
return true;
}
// Make sure that we can mutate our buffer.
// Note that we always allocate at least an this->mLength sized buffer, because the
// rest of the algorithm relies on having access to all of the original
// string. In other words, we over-allocate in the shrinking case.
uint32_t oldLen = this->mLength;
mozilla::Result<uint32_t, nsresult> r =
this->StartBulkWrite(XPCOM_MAX(oldLen, newLength.value()), oldLen);
if (r.isErr()) {
return false;
}
if (aTarget.Length() >= aNewValue.Length()) {
// In the shrinking case, start filling the buffer from the beginning.
const uint32_t delta = (aTarget.Length() - aNewValue.Length());
for (i = 1; i < nonMatching.Length(); ++i) {
// When we move the i'th non-matching segment into position, we need to
// account for the characters deleted by the previous |i| replacements by
// subtracting |i * delta|.
const char_type* sourceSegmentPtr = this->mData + nonMatching[i].mBegin;
char_type* destinationSegmentPtr = this->mData + nonMatching[i].mBegin - i * delta;
// Write the i'th replacement immediately before the new i'th non-matching
// segment.
char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
aNewValue.Data(), aNewValue.Length());
char_traits::move(destinationSegmentPtr, sourceSegmentPtr,
nonMatching[i].mLength);
}
} else {
// In the growing case, start filling the buffer from the end.
const uint32_t delta = (aNewValue.Length() - aTarget.Length());
for (i = nonMatching.Length() - 1; i > 0; --i) {
// When we move the i'th non-matching segment into position, we need to
// account for the characters added by the previous |i| replacements by
// adding |i * delta|.
const char_type* sourceSegmentPtr = this->mData + nonMatching[i].mBegin;
char_type* destinationSegmentPtr = this->mData + nonMatching[i].mBegin + i * delta;
char_traits::move(destinationSegmentPtr, sourceSegmentPtr,
nonMatching[i].mLength);
// Write the i'th replacement immediately before the new i'th non-matching
// segment.
char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
aNewValue.Data(), aNewValue.Length());
}
}
// Adjust the length and make sure the string is null terminated.
this->FinishBulkWrite(newLength.value());
return true;
}
/**
* nsTString::Trim
*/
template <typename T>
void
nsTString<T>::Trim(const char* aSet, bool aTrimLeading, bool aTrimTrailing, bool aIgnoreQuotes)
{
// the old implementation worried about aSet being null :-/
if (!aSet)
return;
char_type* start = this->mData;
char_type* end = this->mData + this->mLength;
// skip over quotes if requested
if (aIgnoreQuotes && this->mLength > 2 && this->mData[0] == this->mData[this->mLength - 1] &&
(this->mData[0] == '\'' || this->mData[0] == '"'))
{
++start;
--end;
}
uint32_t setLen = nsCharTraits<char>::length(aSet);
if (aTrimLeading)
{
uint32_t cutStart = start - this->mData;
uint32_t cutLength = 0;
// walk forward from start to end
for (; start != end; ++start, ++cutLength)
{
int32_t pos = FindChar1(aSet, setLen, 0, *start, setLen);
if (pos == kNotFound)
break;
}
if (cutLength)
{
this->Cut(cutStart, cutLength);
// reset iterators
start = this->mData + cutStart;
end = this->mData + this->mLength - cutStart;
}
}
if (aTrimTrailing)
{
uint32_t cutEnd = end - this->mData;
uint32_t cutLength = 0;
// walk backward from end to start
--end;
for (; end >= start; --end, ++cutLength)
{
int32_t pos = FindChar1(aSet, setLen, 0, *end, setLen);
if (pos == kNotFound)
break;
}
if (cutLength)
this->Cut(cutEnd - cutLength, cutLength);
}
}
/**
* nsTString::CompressWhitespace.
*/
template <typename T>
void
nsTString<T>::CompressWhitespace(bool aTrimLeading, bool aTrimTrailing)
{
// Quick exit
if (this->mLength == 0) {
return;
}
if (!this->EnsureMutable())
this->AllocFailed(this->mLength);
const ASCIIMaskArray& mask = mozilla::ASCIIMask::MaskWhitespace();
char_type* to = this->mData;
char_type* from = this->mData;
char_type* end = this->mData + this->mLength;
// Compresses runs of whitespace down to a normal space ' ' and convert
// any whitespace to a normal space. This assumes that whitespace is
// all standard 7-bit ASCII.
bool skipWS = aTrimLeading;
while (from < end) {
uint32_t theChar = *from++;
if (mozilla::ASCIIMask::IsMasked(mask, theChar)) {
if (!skipWS) {
*to++ = ' ';
skipWS = true;
}
} else {
*to++ = theChar;
skipWS = false;
}
}
// If we need to trim the trailing whitespace, back up one character.
if (aTrimTrailing && skipWS && to > this->mData) {
to--;
}
*to = char_type(0); // add the null
this->mLength = to - this->mData;
}