Bug 1737814 - Part 3: Simplify parts data structure. r=platform-i18n-reviewers,anba,gregtatum

Differential Revision: https://phabricator.services.mozilla.com/D129563
This commit is contained in:
Yoshi Cheng-Hao Huang 2021-11-01 14:23:23 +00:00
parent 2349231e66
commit b4c3f94c5f
4 changed files with 65 additions and 29 deletions

View file

@ -131,19 +131,32 @@ TEST(IntlListFormat, FormatToParts)
mozilla::intl::ListFormat::PartVector parts;
ASSERT_TRUE(lf->FormatToParts(list, buf16, parts).isOk());
std::u16string_view strView = buf16.get_string_view();
ASSERT_EQ(strView, u"Alice, Bob, and Charlie");
// 3 elements, and 2 literals.
ASSERT_EQ((parts.length()), (5u));
ASSERT_EQ(parts[0], (ListFormat::Part{ListFormat::PartType::Element,
MakeStringSpan(u"Alice")}));
ASSERT_EQ(parts[1], (ListFormat::Part{ListFormat::PartType::Literal,
MakeStringSpan(u", ")}));
ASSERT_EQ(parts[2], (ListFormat::Part{ListFormat::PartType::Element,
MakeStringSpan(u"Bob")}));
ASSERT_EQ(parts[3], (ListFormat::Part{ListFormat::PartType::Literal,
MakeStringSpan(u", and ")}));
ASSERT_EQ(parts[4], (ListFormat::Part{ListFormat::PartType::Element,
MakeStringSpan(u"Charlie")}));
auto getSubStringView = [strView, &parts](size_t index) {
size_t pos = index == 0 ? 0 : parts[index - 1].second;
size_t count = parts[index].second - pos;
return strView.substr(pos, count);
};
ASSERT_EQ(parts[0].first, ListFormat::PartType::Element);
ASSERT_EQ(getSubStringView(0), u"Alice");
ASSERT_EQ(parts[1].first, ListFormat::PartType::Literal);
ASSERT_EQ(getSubStringView(1), u", ");
ASSERT_EQ(parts[2].first, ListFormat::PartType::Element);
ASSERT_EQ(getSubStringView(2), u"Bob");
ASSERT_EQ(parts[3].first, ListFormat::PartType::Literal);
ASSERT_EQ(getSubStringView(3), u", and ");
ASSERT_EQ(parts[4].first, ListFormat::PartType::Element);
ASSERT_EQ(getSubStringView(4), u"Charlie");
}
} // namespace mozilla::intl

View file

@ -55,13 +55,13 @@ ListFormat::~ListFormat() {
return ULISTFMT_WIDTH_WIDE;
}
ICUResult ListFormat::FormattedToParts(
const UFormattedValue* formattedValue,
mozilla::Span<const char16_t> formattedSpan, PartVector& parts) {
ICUResult ListFormat::FormattedToParts(const UFormattedValue* formattedValue,
size_t formattedSize,
PartVector& parts) {
size_t lastEndIndex = 0;
auto AppendPart = [&](PartType type, size_t beginIndex, size_t endIndex) {
if (!parts.emplaceBack(type, formattedSpan.FromTo(beginIndex, endIndex))) {
auto AppendPart = [&](PartType type, size_t endIndex) {
if (!parts.emplaceBack(type, endIndex)) {
return false;
}
@ -110,19 +110,19 @@ ICUResult ListFormat::FormattedToParts(
"finish as expected");
if (lastEndIndex < beginIndex) {
if (!AppendPart(PartType::Literal, lastEndIndex, beginIndex)) {
if (!AppendPart(PartType::Literal, beginIndex)) {
return Err(ICUError::InternalError);
}
}
if (!AppendPart(PartType::Element, beginIndex, endIndex)) {
if (!AppendPart(PartType::Element, endIndex)) {
return Err(ICUError::InternalError);
}
}
// Append any final literal.
if (lastEndIndex < formattedSpan.size()) {
if (!AppendPart(PartType::Literal, lastEndIndex, formattedSpan.size())) {
if (lastEndIndex < formattedSize) {
if (!AppendPart(PartType::Literal, formattedSize)) {
return Err(ICUError::InternalError);
}
}

View file

@ -97,24 +97,37 @@ class ListFormat final {
/**
* The corresponding list of parts according to the effective locale and the
* formatting options of ListFormat.
* Each part has a [[Type]] field, which must be "element" or "literal".
* Each part has a [[Type]] field, which must be "element" or "literal", and a
* [[Value]] field.
*
* https://tc39.es/ecma402/#sec-createpartsfromlist
* To store Part more efficiently, it doesn't store the ||Value|| of type
* string in this struct. Instead, it stores the end index of the string in
* the buffer(which is passed to ListFormat::FormatToParts()). The begin index
* of the ||Value|| is the index of the previous part.
*
* Buffer
* 0 i j
* +---------------+---------------+---------------+
* | Part[0].Value | Part[1].Value | Part[2].Value | ....
* +---------------+---------------+---------------+
*
* Part[0].index is i. Part[0].Value is stored in the Buffer[0..i].
* Part[1].index is j. Part[1].Value is stored in the Buffer[i..j].
*
* See https://tc39.es/ecma402/#sec-createpartsfromlist
*/
enum class PartType {
Element,
Literal,
};
using Part = std::pair<PartType, mozilla::Span<const char16_t>>;
// The 2nd field is the end index to the buffer as mentioned above.
using Part = std::pair<PartType, size_t>;
using PartVector = mozilla::Vector<Part, DEFAULT_LIST_LENGTH>;
/**
* Format the list to a list of parts, and store the formatted result of
* UTF-16 string into buffer, and formatted parts into the vector 'parts'.
*
* The PartVector contains mozilla::Span which point to memory owned by the
* provided buffer.
*
* See:
* https://tc39.es/ecma402/#sec-Intl.ListFormat.prototype.formatToParts
* https://tc39.es/ecma402/#sec-formatlisttoparts
@ -151,7 +164,7 @@ class ListFormat final {
if (!value) {
return Err(ICUError::InternalError);
}
return FormattedToParts(value, {buffer.data(), buffer.length()}, parts);
return FormattedToParts(value, buffer.length(), parts);
}
private:
@ -200,8 +213,7 @@ class ListFormat final {
ulistfmt_resultAsValue, ulistfmt_closeResult>;
ICUResult FormattedToParts(const UFormattedValue* formattedValue,
mozilla::Span<const char16_t> formattedSpan,
PartVector& parts);
size_t formattedSize, PartVector& parts);
static UListFormatterType ToUListFormatterType(Type type);
static UListFormatterWidth ToUListFormatterWidth(Style style);

View file

@ -263,6 +263,11 @@ static bool FormatListToParts(JSContext* cx, mozilla::intl::ListFormat* lf,
return false;
}
RootedString overallResult(cx, buffer.toString(cx));
if (!overallResult) {
return false;
}
RootedArrayObject partsArray(cx,
NewDenseFullyAllocatedArray(cx, parts.length()));
if (!partsArray) {
@ -274,6 +279,7 @@ static bool FormatListToParts(JSContext* cx, mozilla::intl::ListFormat* lf,
RootedValue val(cx);
size_t index = 0;
size_t beginIndex = 0;
for (const mozilla::intl::ListFormat::Part& part : parts) {
singlePart = NewPlainObject(cx);
if (!singlePart) {
@ -290,7 +296,9 @@ static bool FormatListToParts(JSContext* cx, mozilla::intl::ListFormat* lf,
return false;
}
JSString* partStr = NewStringCopy<CanGC>(cx, part.second);
MOZ_ASSERT(part.second > beginIndex);
JSLinearString* partStr = NewDependentString(cx, overallResult, beginIndex,
part.second - beginIndex);
if (!partStr) {
return false;
}
@ -299,9 +307,12 @@ static bool FormatListToParts(JSContext* cx, mozilla::intl::ListFormat* lf,
return false;
}
beginIndex = part.second;
partsArray->initDenseElement(index++, ObjectValue(*singlePart));
}
MOZ_ASSERT(index == parts.length());
MOZ_ASSERT(beginIndex == buffer.length());
result.setObject(*partsArray);
return true;