Backed out 3 changesets (bug 1719554) for causing bustages complaining about gfxTextRun.cpp.

Backed out changeset 6181e40d4da1 (bug 1719554)
Backed out changeset c261ede6ae81 (bug 1719554)
Backed out changeset 221ec418475c (bug 1719554)
This commit is contained in:
Butkovits Atila 2021-12-04 00:58:15 +02:00
parent 77a737b1a4
commit 56c46d06a1
41 changed files with 479 additions and 587 deletions

View file

@ -35,9 +35,9 @@ layout/style/nsStyleStructList.h
gfx/gl/GLConsts.h
gfx/webrender_bindings/webrender_ffi_generated.h
dom/webgpu/ffi/wgpu_ffi_generated.h
intl/components/src/UnicodeScriptCodes.h
intl/unicharutil/util/nsSpecialCasingData.cpp
intl/unicharutil/util/nsUnicodePropertyData.cpp
intl/unicharutil/util/nsUnicodeScriptCodes.h
media/mp4parse-rust/mp4parse.h
security/manager/ssl/StaticHPKPins.h
widget/gtk/wayland/gtk-primary-selection-client-protocol.h

View file

@ -215,7 +215,6 @@
#include "mozilla/dom/Element.h"
#include "mozilla/dom/HTMLSlotElement.h"
#include "mozilla/dom/ShadowRoot.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "nsTextFragment.h"
#include "nsAttrValue.h"
@ -304,12 +303,12 @@ static bool DoesNotAffectDirectionOfAncestors(const Element* aElement) {
* Returns the directionality of a Unicode character
*/
static Directionality GetDirectionFromChar(uint32_t ch) {
switch (intl::UnicodeProperties::GetBidiClass(ch)) {
case intl::BidiClass::RightToLeft:
case intl::BidiClass::RightToLeftArabic:
switch (mozilla::unicode::GetBidiCat(ch)) {
case eCharType_RightToLeft:
case eCharType_RightToLeftArabic:
return eDir_RTL;
case intl::BidiClass::LeftToRight:
case eCharType_LeftToRight:
return eDir_LTR;
default:

View file

@ -22,6 +22,7 @@
#include "nsContentUtils.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsUnicodeProperties.h"
#include "nsCRT.h"
#include "mozilla/Casting.h"
#include "mozilla/EditorUtils.h"
@ -30,8 +31,6 @@
#include "mozilla/dom/HTMLBRElement.h"
#include "mozilla/dom/Text.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
#include "mozilla/Span.h"
#include "mozilla/Preferences.h"
#include "mozilla/StaticPrefs_converter.h"
@ -1804,7 +1803,7 @@ int32_t GetUnicharWidth(char32_t aCh) {
return 1;
}
return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
return unicode::IsEastAsianWidthFW(aCh) ? 2 : 1;
}
int32_t GetUnicharStringWidth(Span<const char16_t> aString) {

View file

@ -69,9 +69,10 @@ gfxCoreTextShaper::~gfxCoreTextShaper() {
}
}
static bool IsBuggyIndicScript(intl::Script aScript) {
return aScript == intl::Script::BENGALI || aScript == intl::Script::KANNADA ||
aScript == intl::Script::ORIYA || aScript == intl::Script::KHMER;
static bool IsBuggyIndicScript(unicode::Script aScript) {
return aScript == unicode::Script::BENGALI ||
aScript == unicode::Script::KANNADA ||
aScript == unicode::Script::ORIYA || aScript == unicode::Script::KHMER;
}
bool gfxCoreTextShaper::ShapeText(DrawTarget* aDrawTarget,

View file

@ -1105,10 +1105,10 @@ static void HasLookupRuleWithGlyph(hb_face_t* aFace, hb_tag_t aTableTag,
hb_set_destroy(otherLookups);
}
nsTHashMap<nsUint32HashKey, intl::Script>* gfxFont::sScriptTagToCode = nullptr;
nsTHashMap<nsUint32HashKey, Script>* gfxFont::sScriptTagToCode = nullptr;
nsTHashSet<uint32_t>* gfxFont::sDefaultFeatures = nullptr;
static inline bool HasSubstitution(uint32_t* aBitVector, intl::Script aScript) {
static inline bool HasSubstitution(uint32_t* aBitVector, Script aScript) {
return (aBitVector[static_cast<uint32_t>(aScript) >> 5] &
(1 << (static_cast<uint32_t>(aScript) & 0x1f))) != 0;
}
@ -1165,9 +1165,9 @@ void gfxFont::CheckForFeaturesInvolvingSpace() {
// Ensure that we don't try to look at script codes beyond what the
// current version of ICU (at runtime -- in case of system ICU)
// knows about.
Script scriptCount = Script(
std::min<int>(intl::UnicodeProperties::GetMaxNumberOfScripts() + 1,
int(Script::NUM_SCRIPT_CODES)));
Script scriptCount =
Script(std::min<int>(u_getIntPropertyMaxValue(UCHAR_SCRIPT) + 1,
int(Script::NUM_SCRIPT_CODES)));
for (Script s = Script::ARABIC; s < scriptCount;
s = Script(static_cast<int>(s) + 1)) {
hb_script_t script = hb_script_t(GetScriptTagForCode(s));

View file

@ -24,7 +24,6 @@
#include "mozilla/UniquePtr.h"
#include "mozilla/gfx/MatrixFwd.h"
#include "mozilla/gfx/Point.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsCOMPtr.h"
#include "nsColor.h"
#include "nsTHashMap.h"
@ -38,6 +37,7 @@
#include "nsString.h"
#include "nsTArray.h"
#include "nsTHashtable.h"
#include "nsUnicodeScriptCodes.h"
#include "nscore.h"
// Only required for function bodys
@ -672,7 +672,7 @@ class gfxTextRunFactory {
class gfxFontShaper {
public:
typedef mozilla::gfx::DrawTarget DrawTarget;
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
enum class RoundingFlags : uint8_t { kRoundX = 0x01, kRoundY = 0x02 };
@ -731,7 +731,7 @@ MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(gfxFontShaper::RoundingFlags)
*/
class gfxShapedText {
public:
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
gfxShapedText(uint32_t aLength, mozilla::gfx::ShapedTextFlags aFlags,
uint16_t aAppUnitsPerDevUnit)
@ -1261,7 +1261,7 @@ class gfxShapedText {
*/
class gfxShapedWord final : public gfxShapedText {
public:
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
// Create a ShapedWord that can hold glyphs for aLength characters,
// with mCharacterGlyphs sized appropriately.
@ -1426,7 +1426,7 @@ class gfxFont {
protected:
using DrawTarget = mozilla::gfx::DrawTarget;
using Script = mozilla::intl::Script;
using Script = mozilla::unicode::Script;
using SVGContextPaint = mozilla::SVGContextPaint;
using RoundingFlags = gfxFontShaper::RoundingFlags;

View file

@ -814,7 +814,7 @@ tainted_boolean_hint gfxFontEntry::HasGraphiteSpaceContextuals() {
#define FEATURE_SCRIPT_MASK 0x000000ff // script index replaces low byte of tag
static_assert(int(intl::Script::NUM_SCRIPT_CODES) <= FEATURE_SCRIPT_MASK,
static_assert(int(Script::NUM_SCRIPT_CODES) <= FEATURE_SCRIPT_MASK,
"Too many script codes");
// high-order three bytes of tag with script in low-order byte
@ -1780,8 +1780,7 @@ void gfxFontFamily::FindFontForChar(GlobalFontMatch* aMatchData) {
LogModule* log = gfxPlatform::GetLog(eGfxLog_textrun);
if (MOZ_UNLIKELY(MOZ_LOG_TEST(log, LogLevel::Debug))) {
intl::Script script =
intl::UnicodeProperties::GetScriptCode(aMatchData->mCh);
Script script = GetScriptCode(aMatchData->mCh);
MOZ_LOG(log, LogLevel::Debug,
("(textrun-systemfallback-fonts) char: u+%6.6x "
"script: %d match: [%s]\n",

View file

@ -23,13 +23,13 @@
#include "mozilla/RefPtr.h"
#include "mozilla/TypedEnumBits.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsTHashMap.h"
#include "nsDebug.h"
#include "nsHashKeys.h"
#include "nsISupports.h"
#include "nsStringFwd.h"
#include "nsTArray.h"
#include "nsUnicodeScriptCodes.h"
#include "nscore.h"
class FontInfoData;
@ -131,7 +131,7 @@ struct gfxFontFeatureInfo {
class gfxFontEntry {
public:
typedef mozilla::gfx::DrawTarget DrawTarget;
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
typedef mozilla::FontWeight FontWeight;
typedef mozilla::FontSlantStyle FontSlantStyle;
typedef mozilla::FontStretch FontStretch;

View file

@ -11,9 +11,8 @@
#include "gfxTextRun.h"
#include "mozilla/Sprintf.h"
#include "mozilla/intl/String.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsUnicodeProperties.h"
#include "nsUnicodeScriptCodes.h"
#include "harfbuzz/hb.h"
#include "harfbuzz/hb-ot.h"
@ -982,7 +981,7 @@ static hb_position_t HBGetHKerning(hb_font_t* font, void* font_data,
static hb_codepoint_t HBGetMirroring(hb_unicode_funcs_t* ufuncs,
hb_codepoint_t aCh, void* user_data) {
return intl::UnicodeProperties::CharMirror(aCh);
return GetMirroredChar(aCh);
}
static hb_unicode_general_category_t HBGetGeneralCategory(
@ -992,20 +991,18 @@ static hb_unicode_general_category_t HBGetGeneralCategory(
static hb_script_t HBGetScript(hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh,
void* user_data) {
return hb_script_t(
GetScriptTagForCode(intl::UnicodeProperties::GetScriptCode(aCh)));
return hb_script_t(GetScriptTagForCode(GetScriptCode(aCh)));
}
static hb_unicode_combining_class_t HBGetCombiningClass(
hb_unicode_funcs_t* ufuncs, hb_codepoint_t aCh, void* user_data) {
return hb_unicode_combining_class_t(
intl::UnicodeProperties::GetCombiningClass(aCh));
return hb_unicode_combining_class_t(GetCombiningClass(aCh));
}
static hb_bool_t HBUnicodeCompose(hb_unicode_funcs_t* ufuncs, hb_codepoint_t a,
hb_codepoint_t b, hb_codepoint_t* ab,
void* user_data) {
char32_t ch = intl::String::ComposePairNFC(a, b);
char32_t ch = mozilla::intl::String::ComposePairNFC(a, b);
if (ch > 0) {
*ab = ch;
return true;
@ -1028,7 +1025,7 @@ static hb_bool_t HBUnicodeDecompose(hb_unicode_funcs_t* ufuncs,
#endif
char32_t decomp[2] = {0};
if (intl::String::DecomposeRawNFD(ab, decomp)) {
if (mozilla::intl::String::DecomposeRawNFD(ab, decomp)) {
if (decomp[1] || decomp[0] != ab) {
*a = decomp[0];
*b = decomp[1];

View file

@ -8,10 +8,10 @@
#include "mozilla/FontPropertyTypes.h"
#include "mozilla/gfx/Types.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsTArray.h"
#include "nsString.h"
#include "nsCOMPtr.h"
#include "nsUnicodeScriptCodes.h"
#include "gfxTelemetry.h"
#include "gfxTypes.h"
@ -186,7 +186,7 @@ class gfxPlatform : public mozilla::layers::MemoryPressureListener {
typedef mozilla::gfx::DrawTarget DrawTarget;
typedef mozilla::gfx::IntSize IntSize;
typedef mozilla::gfx::SourceSurface SourceSurface;
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
/**
* Return a pointer to the current active platform.

View file

@ -975,7 +975,7 @@ gfxFont* gfxPlatformFontList::SystemFindFontForChar(
LogModule* log = gfxPlatform::GetLog(eGfxLog_textrun);
if (MOZ_UNLIKELY(MOZ_LOG_TEST(log, LogLevel::Warning))) {
Script script = intl::UnicodeProperties::GetScriptCode(aCh);
Script script = mozilla::unicode::GetScriptCode(aCh);
MOZ_LOG(log, LogLevel::Warning,
("(textrun-systemfallback-%s) char: u+%6.6x "
"script: %d match: [%s]"

View file

@ -162,7 +162,7 @@ class gfxPlatformFontList : public gfxFontInfoLoader {
typedef mozilla::StretchRange StretchRange;
typedef mozilla::SlantStyleRange SlantStyleRange;
typedef mozilla::WeightRange WeightRange;
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
// For font family lists loaded from user preferences (prefs such as
// font.name-list.<generic>.<langGroup>) that map CSS generics to

View file

@ -48,12 +48,11 @@
*/
#include "gfxScriptItemizer.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsCharTraits.h"
#include "mozilla/intl/Script.h"
#include "nsUnicodeProperties.h"
#include "nsCharTraits.h"
#include "harfbuzz/hb.h"
using namespace mozilla::intl;
using namespace mozilla::unicode;
#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
@ -117,8 +116,7 @@ static inline bool SameScript(Script runScript, Script currCharScript,
uint32_t aCurrCh) {
return CanMergeWithContext(runScript) ||
CanMergeWithContext(currCharScript) || currCharScript == runScript ||
IsClusterExtender(aCurrCh) ||
UnicodeProperties::HasScript(aCurrCh, runScript);
IsClusterExtender(aCurrCh) || HasScript(aCurrCh, runScript);
}
gfxScriptItemizer::gfxScriptItemizer(const char16_t* src, uint32_t length)
@ -164,7 +162,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
// if the character has script=COMMON, otherwise we don't care.
uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
sc = UnicodeProperties::GetScriptCode(ch);
sc = GetScriptCode(ch);
if (sc == Script::COMMON) {
/*
* Paired character handling:
@ -179,12 +177,12 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
*/
gc = GetGeneralCategory(ch);
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
uint32_t endPairChar = UnicodeProperties::CharMirror(ch);
uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
if (endPairChar != ch) {
push(endPairChar, scriptCode);
}
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
UnicodeProperties::IsMirrored(ch)) {
HasMirroredChar(ch)) {
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
pop();
}
@ -206,8 +204,8 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
} else if (fallbackScript == Script::UNKNOWN) {
// See if the character has a ScriptExtensions property we can
// store for use in the event the run remains unresolved.
UnicodeProperties::ScriptExtensionVector extensions;
auto extResult = UnicodeProperties::GetExtensions(ch, extensions);
mozilla::intl::ScriptExtensionVector extensions;
auto extResult = mozilla::intl::Script::GetExtensions(ch, extensions);
if (extResult.isOk()) {
Script ext = Script(extensions[0]);
if (!CanMergeWithContext(ext)) {
@ -222,7 +220,7 @@ bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
* pop the matching open character from the stack
*/
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
UnicodeProperties::IsMirrored(ch)) {
HasMirroredChar(ch)) {
pop();
}
} else {

View file

@ -51,13 +51,13 @@
#define GFX_SCRIPTITEMIZER_H
#include <stdint.h>
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsUnicodeScriptCodes.h"
#define PAREN_STACK_DEPTH 32
class gfxScriptItemizer {
public:
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
gfxScriptItemizer(const char16_t* src, uint32_t length);

View file

@ -19,7 +19,6 @@
#include "gfxUserFontSet.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/RefPtr.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsPoint.h"
#include "nsString.h"
#include "nsTArray.h"
@ -27,6 +26,7 @@
#include "nsTextFrameUtils.h"
#include "DrawMode.h"
#include "harfbuzz/hb.h"
#include "nsUnicodeScriptCodes.h"
#include "nsColor.h"
#include "nsFrameList.h"
#include "X11UndefineNone.h"
@ -901,7 +901,7 @@ class gfxTextRun : public gfxShapedText {
class gfxFontGroup final : public gfxTextRunFactory {
public:
typedef mozilla::intl::Script Script;
typedef mozilla::unicode::Script Script;
typedef gfxShapedText::CompressedGlyph CompressedGlyph;
static void
@ -1508,7 +1508,7 @@ class gfxMissingFontRecorder {
}
// record this script code in our mMissingFonts bitset
void RecordScript(mozilla::intl::Script aScriptCode) {
void RecordScript(mozilla::unicode::Script aScriptCode) {
mMissingFonts[static_cast<uint32_t>(aScriptCode) >> 5] |=
(1 << (static_cast<uint32_t>(aScriptCode) & 0x1f));
}
@ -1524,7 +1524,8 @@ class gfxMissingFontRecorder {
private:
// Number of 32-bit words needed for the missing-script flags
static const uint32_t kNumScriptBitsWords =
((static_cast<int>(mozilla::intl::Script::NUM_SCRIPT_CODES) + 31) / 32);
((static_cast<int>(mozilla::unicode::Script::NUM_SCRIPT_CODES) + 31) /
32);
uint32_t mMissingFonts[kNumScriptBitsWords];
};

View file

@ -3,60 +3,60 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/intl/Script.h"
#include "nsUnicodeScriptCodes.h"
namespace mozilla::intl {
TEST(IntlScript, GetExtensions)
{
UnicodeProperties::ScriptExtensionVector extensions;
ScriptExtensionVector extensions;
// 0x0000..0x0040 are Common.
for (char32_t ch = 0; ch < 0x0041; ch++) {
ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
ASSERT_TRUE(Script::GetExtensions(ch, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::COMMON);
ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::COMMON);
}
// 0x0300..0x0341 are Inherited.
for (char32_t ch = 0x300; ch < 0x0341; ch++) {
ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
ASSERT_TRUE(Script::GetExtensions(ch, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::INHERITED);
ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::INHERITED);
}
// 0x1cf7's script code is Common, but its script extension is Beng.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0x1cf7, extensions).isOk());
ASSERT_TRUE(Script::GetExtensions(0x1cf7, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::BENGALI);
ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::BENGALI);
// ؿ
// https://unicode-table.com/en/063F/
// This character doesn't have any script extension, so the script code is
// returned.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0x063f, extensions).isOk());
ASSERT_TRUE(Script::GetExtensions(0x063f, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::ARABIC);
ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::ARABIC);
// 0xff65 is the unicode character '・', see https://unicode-table.com/en/FF65/
// Halfwidth Katakana Middle Dot.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0xff65, extensions).isOk());
ASSERT_TRUE(Script::GetExtensions(0xff65, extensions).isOk());
// 0xff65 should have the following script extensions:
// Bopo Hang Hani Hira Kana Yiii.
ASSERT_EQ(extensions.length(), 6u);
ASSERT_EQ(Script(extensions[0]), Script::BOPOMOFO);
ASSERT_EQ(Script(extensions[1]), Script::HAN);
ASSERT_EQ(Script(extensions[2]), Script::HANGUL);
ASSERT_EQ(Script(extensions[3]), Script::HIRAGANA);
ASSERT_EQ(Script(extensions[4]), Script::KATAKANA);
ASSERT_EQ(Script(extensions[5]), Script::YI);
ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::BOPOMOFO);
ASSERT_EQ(unicode::Script(extensions[1]), unicode::Script::HAN);
ASSERT_EQ(unicode::Script(extensions[2]), unicode::Script::HANGUL);
ASSERT_EQ(unicode::Script(extensions[3]), unicode::Script::HIRAGANA);
ASSERT_EQ(unicode::Script(extensions[4]), unicode::Script::KATAKANA);
ASSERT_EQ(unicode::Script(extensions[5]), unicode::Script::YI);
// The max code point is 0x10ffff, so 0x110000 should be invalid.
// Script::UNKNOWN should be returned for an invalid code point.
ASSERT_TRUE(UnicodeProperties::GetExtensions(0x110000, extensions).isOk());
ASSERT_TRUE(Script::GetExtensions(0x110000, extensions).isOk());
ASSERT_EQ(extensions.length(), 1u);
ASSERT_EQ(Script(extensions[0]), Script::UNKNOWN);
ASSERT_EQ(unicode::Script(extensions[0]), unicode::Script::UNKNOWN);
}
} // namespace mozilla::intl

View file

@ -5,7 +5,6 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
EXPORTS.mozilla.intl = [
"src/Bidi.h",
"src/BidiClass.h",
"src/BidiEmbeddingLevel.h",
"src/Calendar.h",
"src/Collator.h",
@ -28,10 +27,9 @@ EXPORTS.mozilla.intl = [
"src/NumberRangeFormat.h",
"src/PluralRules.h",
"src/RelativeTimeFormat.h",
"src/Script.h",
"src/String.h",
"src/TimeZone.h",
"src/UnicodeProperties.h",
"src/UnicodeScriptCodes.h",
]
UNIFIED_SOURCES += [
@ -58,6 +56,7 @@ UNIFIED_SOURCES += [
"src/NumberRangeFormat.cpp",
"src/PluralRules.cpp",
"src/RelativeTimeFormat.cpp",
"src/Script.cpp",
"src/String.cpp",
"src/TimeZone.cpp",
]

View file

@ -1,47 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_BidiClass_h_
#define intl_components_BidiClass_h_
namespace mozilla::intl {
/**
* Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
* section BIDIRECTIONAL PROPERTIES
* for the detailed definition of the following categories
*
* The values here must match the equivalents in %bidicategorycode in
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
* and must also match the values used by ICU's UCharDirection.
*/
enum class BidiClass : uint8_t {
LeftToRight = 0,
RightToLeft = 1,
EuropeanNumber = 2,
EuropeanNumberSeparator = 3,
EuropeanNumberTerminator = 4,
ArabicNumber = 5,
CommonNumberSeparator = 6,
BlockSeparator = 7,
SegmentSeparator = 8,
WhiteSpaceNeutral = 9,
OtherNeutral = 10,
LeftToRightEmbedding = 11,
LeftToRightOverride = 12,
RightToLeftArabic = 13,
RightToLeftEmbedding = 14,
RightToLeftOverride = 15,
PopDirectionalFormat = 16,
DirNonSpacingMark = 17,
BoundaryNeutral = 18,
FirstStrongIsolate = 19,
LeftToRightIsolate = 20,
RightToLeftIsolate = 21,
PopDirectionalIsolate = 22,
BidiClassCount
};
} // namespace mozilla::intl
#endif

View file

@ -0,0 +1,40 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/intl/Script.h"
#include "unicode/uscript.h"
namespace mozilla::intl {
// static
ICUResult Script::GetExtensions(char32_t aCodePoint,
ScriptExtensionVector& aExtensions) {
// Clear the vector first.
aExtensions.clear();
// We cannot pass aExtensions to uscript_getScriptExtension as USCriptCode
// takes 4 bytes, so create a local UScriptCode array to get the extensions.
UScriptCode ext[kMaxScripts];
UErrorCode status = U_ZERO_ERROR;
int32_t len = uscript_getScriptExtensions(static_cast<UChar32>(aCodePoint),
ext, kMaxScripts, &status);
if (U_FAILURE(status)) {
// kMaxScripts should be large enough to hold the maximun number of script
// extensions.
MOZ_DIAGNOSTIC_ASSERT(status != U_BUFFER_OVERFLOW_ERROR);
return Err(ToICUError(status));
}
if (!aExtensions.reserve(len)) {
return Err(ICUError::OutOfMemory);
}
for (int32_t i = 0; i < len; i++) {
aExtensions.infallibleAppend(ext[i]);
}
return Ok();
}
} // namespace mozilla::intl

View file

@ -0,0 +1,55 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_Script_h_
#define intl_components_Script_h_
#include "mozilla/intl/ICU4CGlue.h"
#include "mozilla/Vector.h"
namespace mozilla::intl {
// The code point which has the most script extensions is 0x0965, which has 21
// script extensions, so choose the vector size as 32 to prevent heap
// allocation.
constexpr size_t kMaxScripts = 32;
// The list of script extensions, it consists of one or more script codes from
// ISO 15924, or mozilla::unicode::Script.
//
// Choose the element type as int16_t to have the same size of
// mozilla::unicode::Script.
// We didn't use mozilla::unicode::Script directly here because we cannot
// include the header in standalone JS shell build.
using ScriptExtensionVector = Vector<int16_t, kMaxScripts>;
/**
* This component is a Mozilla-focused API for working with Unicode scripts.
*/
class Script final {
public:
/**
* Get the script extensions for the given code point, and write the script
* extensions to aExtensions vector. If the code point has script extensions,
* the script code (Script::COMMON or Script::INHERITED) will be excluded.
*
* If the code point doesn't have any script extension, then its script code
* will be written to aExtensions vector.
*
* If the code point is invalid, Script::UNKNOWN will be written to
* aExtensions vector.
*
* Note: aExtensions will be cleared after calling this method regardless of
* failure.
*
* See [1] for the script code of the code point, [2] for the script
* extensions.
*
* https://www.unicode.org/Public/UNIDATA/Scripts.txt
* https://www.unicode.org/Public/UNIDATA/ScriptExtensions.txt
*/
static ICUResult GetExtensions(char32_t aCodePoint,
ScriptExtensionVector& aExtensions);
};
} // namespace mozilla::intl
#endif // intl_components_Script_h_

View file

@ -1,306 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef intl_components_UnicodeProperties_h_
#define intl_components_UnicodeProperties_h_
#include "mozilla/intl/BidiClass.h"
#include "mozilla/intl/ICU4CGlue.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/Vector.h"
#include "unicode/uchar.h"
#include "unicode/uscript.h"
namespace mozilla::intl {
/**
* This component is a Mozilla-focused API for working with text properties.
*/
class UnicodeProperties final {
public:
/**
* Return the BidiClass for the character.
*/
static inline BidiClass GetBidiClass(uint32_t aCh) {
return BidiClass(u_charDirection(aCh));
}
/**
* Maps the specified character to a "mirror-image" character.
*/
static inline uint32_t CharMirror(uint32_t aCh) { return u_charMirror(aCh); }
/**
* Return the general category value for the code point.
*/
static inline uint32_t CharType(uint32_t aCh) { return u_charType(aCh); }
/**
* Determine whether the code point has the Bidi_Mirrored property.
*/
static inline bool IsMirrored(uint32_t aCh) { return u_isMirrored(aCh); }
/**
* Returns the combining class of the code point as specified in
* UnicodeData.txt.
*/
static inline uint8_t GetCombiningClass(uint32_t aCh) {
return u_getCombiningClass(aCh);
}
enum class IntProperty {
BidiPairedBracketType,
EastAsianWidth,
HangulSyllableType,
LineBreak,
NumericType,
};
/**
* Get the property value for an enumerated or integer Unicode property for a
* code point.
*/
static inline int32_t GetIntPropertyValue(uint32_t aCh, IntProperty aProp) {
UProperty prop;
switch (aProp) {
case IntProperty::BidiPairedBracketType:
prop = UCHAR_BIDI_PAIRED_BRACKET_TYPE;
break;
case IntProperty::EastAsianWidth:
prop = UCHAR_EAST_ASIAN_WIDTH;
break;
case IntProperty::HangulSyllableType:
prop = UCHAR_HANGUL_SYLLABLE_TYPE;
break;
case IntProperty::LineBreak:
prop = UCHAR_LINE_BREAK;
break;
case IntProperty::NumericType:
prop = UCHAR_NUMERIC_TYPE;
break;
}
return u_getIntPropertyValue(aCh, prop);
}
/**
* Get the numeric value for a Unicode code point as defined in the
* Unicode Character Database if the input is decimal or a digit,
* otherwise, returns -1.
*/
static inline int8_t GetNumericValue(uint32_t aCh) {
UNumericType type =
UNumericType(GetIntPropertyValue(aCh, IntProperty::NumericType));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh))
: -1;
}
/**
* Maps the specified character to its paired bracket character.
*/
static inline uint32_t GetBidiPairedBracket(uint32_t aCh) {
return u_getBidiPairedBracket(aCh);
}
/**
* The given character is mapped to its uppercase equivalent according to
* UnicodeData.txt; if the character has no uppercase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToUpper(uint32_t aCh) { return u_toupper(aCh); }
/**
* The given character is mapped to its lowercase equivalent according to
* UnicodeData.txt; if the character has no lowercase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToLower(uint32_t aCh) { return u_tolower(aCh); }
/**
* Check if a code point has the Lowercase Unicode property.
*/
static inline bool IsLowercase(uint32_t aCh) { return u_isULowercase(aCh); }
/**
* The given character is mapped to its titlecase equivalent according to
* UnicodeData.txt; if the character has no titlecase equivalent, the
* character itself is returned.
*/
static inline uint32_t ToTitle(uint32_t aCh) { return u_totitle(aCh); }
/**
* The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt;
* if the character has no case folding equivalent, the character
* itself is returned.
*/
static inline uint32_t FoldCase(uint32_t aCh) {
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
}
enum class BinaryProperty {
DefaultIgnorableCodePoint,
Emoji,
EmojiPresentation,
};
/**
* Check a binary Unicode property for a code point.
*/
static inline bool HasBinaryProperty(uint32_t aCh, BinaryProperty aProp) {
UProperty prop;
switch (aProp) {
case BinaryProperty::DefaultIgnorableCodePoint:
prop = UCHAR_DEFAULT_IGNORABLE_CODE_POINT;
break;
case BinaryProperty::Emoji:
prop = UCHAR_EMOJI;
break;
case BinaryProperty::EmojiPresentation:
prop = UCHAR_EMOJI_PRESENTATION;
break;
}
return u_hasBinaryProperty(aCh, prop);
}
/**
* Check if the width of aCh is full width, half width or wide
* excluding emoji.
*/
static inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_FULLWIDTH:
case U_EA_HALFWIDTH:
return true;
case U_EA_WIDE:
return HasBinaryProperty(aCh, BinaryProperty::Emoji) ? false : true;
case U_EA_AMBIGUOUS:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
/**
* Check if the width of aCh is ambiguous, full width, or wide.
*/
static inline bool IsEastAsianWidthAFW(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_AMBIGUOUS:
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
/**
* Check if the width of aCh is full width, or wide.
*/
static inline bool IsEastAsianWidthFW(uint32_t aCh) {
switch (GetIntPropertyValue(aCh, IntProperty::EastAsianWidth)) {
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_AMBIGUOUS:
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
/**
* Check if the CharType of aCh is math or other symbol.
*/
static inline bool IsMathOrMusicSymbol(uint32_t aCh) {
// Keep this function in sync with is_math_symbol in base_chars.py.
return CharType(aCh) == U_MATH_SYMBOL || CharType(aCh) == U_OTHER_SYMBOL;
}
static inline Script GetScriptCode(uint32_t aCh) {
// We can safely ignore the error code here because uscript_getScript
// returns USCRIPT_INVALID_CODE in the event of an error.
UErrorCode err = U_ZERO_ERROR;
return Script(uscript_getScript(aCh, &err));
}
static inline bool HasScript(uint32_t aCh, Script aScript) {
return uscript_hasScript(aCh, UScriptCode(aScript));
}
static inline const char* GetScriptShortName(Script aScript) {
return uscript_getShortName(UScriptCode(aScript));
}
static inline int32_t GetMaxNumberOfScripts() {
return u_getIntPropertyMaxValue(UCHAR_SCRIPT);
}
// The code point which has the most script extensions is 0x0965, which has 21
// script extensions, so choose the vector size as 32 to prevent heap
// allocation.
static constexpr size_t kMaxScripts = 32;
using ScriptExtensionVector = Vector<Script, kMaxScripts>;
/**
* Get the script extensions for the given code point, and write the script
* extensions to aExtensions vector. If the code point has script extensions,
* the script code (Script::COMMON or Script::INHERITED) will be excluded.
*
* If the code point doesn't have any script extension, then its script code
* will be written to aExtensions vector.
*
* If the code point is invalid, Script::UNKNOWN will be written to
* aExtensions vector.
*
* Note: aExtensions will be cleared after calling this method regardless of
* failure.
*
* See [1] for the script code of the code point, [2] for the script
* extensions.
*
* https://www.unicode.org/Public/UNIDATA/Scripts.txt
* https://www.unicode.org/Public/UNIDATA/ScriptExtensions.txt
*/
static ICUResult GetExtensions(char32_t aCodePoint,
ScriptExtensionVector& aExtensions) {
// Clear the vector first.
aExtensions.clear();
// We cannot pass aExtensions to uscript_getScriptExtension as USCriptCode
// takes 4 bytes, so create a local UScriptCode array to get the extensions.
UScriptCode ext[kMaxScripts];
UErrorCode status = U_ZERO_ERROR;
int32_t len = uscript_getScriptExtensions(static_cast<UChar32>(aCodePoint),
ext, kMaxScripts, &status);
if (U_FAILURE(status)) {
// kMaxScripts should be large enough to hold the maximun number of script
// extensions.
MOZ_DIAGNOSTIC_ASSERT(status != U_BUFFER_OVERFLOW_ERROR);
return Err(ToICUError(status));
}
if (!aExtensions.reserve(len)) {
return Err(ICUError::OutOfMemory);
}
for (int32_t i = 0; i < len; i++) {
aExtensions.infallibleAppend(Script(ext[i]));
}
return Ok();
}
};
} // namespace mozilla::intl
#endif

View file

@ -11,7 +11,6 @@
#include "nsUnicodeProperties.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"
using namespace mozilla::unicode;
using namespace mozilla::intl;
@ -463,12 +462,10 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (aIsChineseOrJapanese) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
return CLASS_OPEN_LIKE_CHARACTER;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
@ -488,12 +485,10 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (aIsChineseOrJapanese) {
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
return CLASS_CLOSE_LIKE_CHARACTER;
}
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
return CLASS_OPEN_LIKE_CHARACTER;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
@ -518,12 +513,10 @@ static int8_t GetClass(uint32_t u, LineBreakRule aLevel,
u == 0xFF01 || u == 0xFF1F) {
return CLASS_BREAKABLE;
}
if (cls == U_LB_POSTFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
return CLASS_BREAKABLE;
}
if (cls == U_LB_PREFIX_NUMERIC &&
UnicodeProperties::IsEastAsianWidthAFW(u)) {
if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
return CLASS_BREAKABLE;
}
if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {

View file

@ -3,17 +3,17 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/WordBreaker.h"
#include "mozilla/StaticPrefs_layout.h"
#include "nsComplexBreaker.h"
#include "nsTArray.h"
#include "nsUnicodeProperties.h"
using mozilla::intl::Script;
using mozilla::intl::UnicodeProperties;
using mozilla::intl::WordBreaker;
using mozilla::intl::WordRange;
using mozilla::unicode::GetGenCategory;
using mozilla::unicode::GetScriptCode;
using mozilla::unicode::Script;
#define IS_ASCII(c) (0 == (0xFF80 & (c)))
#define ASCII_IS_ALPHA(c) \
@ -40,7 +40,7 @@ using mozilla::unicode::GetGenCategory;
// the script is not supported by the platform, we just won't find any useful
// boundaries.)
static bool IsScriptioContinua(char16_t aChar) {
Script sc = UnicodeProperties::GetScriptCode(aChar);
Script sc = GetScriptCode(aChar);
return sc == Script::THAI || sc == Script::MYANMAR || sc == Script::KHMER ||
sc == Script::JAVANESE || sc == Script::BALINESE ||
sc == Script::SUNDANESE || sc == Script::LAO;

View file

@ -47,7 +47,7 @@
# This will generate (or overwrite!) the files
#
# nsUnicodePropertyData.cpp
# UnicodeScriptCodes.h
# nsUnicodeScriptCodes.h
#
# in the current directory.
@ -71,7 +71,7 @@ if ($#ARGV != 1) {
# This will generate (or overwrite!) the files
#
# nsUnicodePropertyData.cpp
# UnicodeScriptCodes.h
# nsUnicodeScriptCodes.h
#
# in the current directory.
__EOT
@ -132,7 +132,7 @@ my %idType = (
"Deprecated" => 12
);
# These match the IdentifierType enum in UnicodeProperties.h.
# These match the IdentifierType enum in nsUnicodeProperties.h.
my %mappedIdType = (
"Restricted" => 0,
"Allowed" => 1
@ -292,7 +292,9 @@ my $timestamp = gmtime();
open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output";
my $licenseBlock = q[/* This Source Code Form is subject to the terms of the Mozilla Public
my $licenseBlock = q[
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -322,7 +324,7 @@ $versionInfo
__END
open HEADER, "> UnicodeScriptCodes.h" or die "unable to open UnicodeScriptCodes.h for output";
open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output";
print HEADER <<__END;
$licenseBlock
@ -336,8 +338,8 @@ $versionInfo
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/
#ifndef intl_components_UnicodeScriptCodes_h_
#define intl_components_UnicodeScriptCodes_h_
#ifndef NS_UNICODE_SCRIPT_CODES
#define NS_UNICODE_SCRIPT_CODES
__END
@ -349,7 +351,16 @@ sub sprintCharProps2_short
return sprintf("{%d,%d},",
$verticalOrientation[$usv], $idtype[$usv]);
}
&genTables("CharProp2", "", "nsCharProps2", 9, 7, \&sprintCharProps2_short, 16, 1, 1);
my $type = q|
struct nsCharProps2 {
// Currently only 4 bits are defined here, so 4 more could be added without
// affecting the storage requirements for this struct. Or we could pack two
// records per byte, at the cost of a slightly more complex accessor.
unsigned char mVertOrient:2;
unsigned char mIdType:2;
};
|;
&genTables("CharProp2", $type, "nsCharProps2", 9, 7, \&sprintCharProps2_short, 16, 1, 1);
sub sprintHanVariants
{
@ -474,7 +485,8 @@ __END
close DATA_TABLES;
print HEADER "namespace mozilla::intl {\n";
print HEADER "namespace mozilla {\n";
print HEADER "namespace unicode {\n";
print HEADER "enum class Script : int16_t {\n";
for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) {
print HEADER " ", $scriptCodeToName[$i], " = ", $i, ",\n";
@ -482,7 +494,15 @@ for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) {
print HEADER "\n NUM_SCRIPT_CODES = ", scalar @scriptCodeToName, ",\n";
print HEADER "\n INVALID = -1\n";
print HEADER "};\n";
print HEADER "} // namespace mozilla::intl\n\n";
print HEADER <<__END;
// mozilla::intl::ScriptExtensionVector assumes sizeof(Script) is equal to
// sizeof(int16_t), so if the data type of Script is changed then
// ScriptExtensionVector needs to be updated accordingly.
static_assert(sizeof(Script) == sizeof(int16_t));
__END
print HEADER "} // namespace unicode\n";
print HEADER "} // namespace mozilla\n\n";
print HEADER <<__END;
#endif

View file

@ -12,6 +12,7 @@ EXPORTS += [
"nsSpecialCasingData.h",
"nsUnicharUtils.h",
"nsUnicodeProperties.h",
"nsUnicodeScriptCodes.h",
]
UNIFIED_SOURCES += [

View file

@ -6,24 +6,63 @@
#ifndef nsBidiUtils_h__
#define nsBidiUtils_h__
#include "mozilla/intl/BidiClass.h"
#include "nsString.h"
#include "encoding_rs_mem.h"
/**
* Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
* section BIDIRECTIONAL PROPERTIES
* for the detailed definition of the following categories
*
* The values here must match the equivalents in %bidicategorycode in
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
* and must also match the values used by ICU's UCharDirection.
*/
enum nsCharType {
eCharType_LeftToRight = 0,
eCharType_RightToLeft = 1,
eCharType_EuropeanNumber = 2,
eCharType_EuropeanNumberSeparator = 3,
eCharType_EuropeanNumberTerminator = 4,
eCharType_ArabicNumber = 5,
eCharType_CommonNumberSeparator = 6,
eCharType_BlockSeparator = 7,
eCharType_SegmentSeparator = 8,
eCharType_WhiteSpaceNeutral = 9,
eCharType_OtherNeutral = 10,
eCharType_LeftToRightEmbedding = 11,
eCharType_LeftToRightOverride = 12,
eCharType_RightToLeftArabic = 13,
eCharType_RightToLeftEmbedding = 14,
eCharType_RightToLeftOverride = 15,
eCharType_PopDirectionalFormat = 16,
eCharType_DirNonSpacingMark = 17,
eCharType_BoundaryNeutral = 18,
eCharType_FirstStrongIsolate = 19,
eCharType_LeftToRightIsolate = 20,
eCharType_RightToLeftIsolate = 21,
eCharType_PopDirectionalIsolate = 22,
eCharType_CharTypeCount
};
/**
* This specifies the language directional property of a character set.
*/
typedef enum nsCharType nsCharType;
/**
* definitions of bidirection character types by category
*/
#define BIDICLASS_IS_RTL(val) \
(((val) == mozilla::intl::BidiClass::RightToLeft) || \
((val) == mozilla::intl::BidiClass::RightToLeftArabic))
#define CHARTYPE_IS_RTL(val) \
(((val) == eCharType_RightToLeft) || ((val) == eCharType_RightToLeftArabic))
#define BIDICLASS_IS_WEAK(val) \
(((val) == mozilla::intl::BidiClass::EuropeanNumberSeparator) || \
((val) == mozilla::intl::BidiClass::EuropeanNumberTerminator) || \
(((val) > mozilla::intl::BidiClass::ArabicNumber) && \
((val) != mozilla::intl::BidiClass::RightToLeftArabic)))
#define CHARTYPE_IS_WEAK(val) \
(((val) == eCharType_EuropeanNumberSeparator) || \
((val) == eCharType_EuropeanNumberTerminator) || \
(((val) > eCharType_ArabicNumber) && \
((val) != eCharType_RightToLeftArabic)))
/**
* Inspects a Unichar, converting numbers to Arabic or Hindi forms and

View file

@ -5,9 +5,9 @@
#include "nsUnicharUtils.h"
#include "nsUTF8Utils.h"
#include "nsUnicodeProperties.h"
#include "mozilla/Likely.h"
#include "mozilla/HashFunctions.h"
#include "mozilla/intl/UnicodeProperties.h"
// We map x -> x, except for upper-case letters,
// which we map to their lower-case equivalents.
@ -33,7 +33,7 @@ static MOZ_ALWAYS_INLINE uint32_t ToLowerCase_inline(uint32_t aChar) {
return gASCIIToLower[aChar];
}
return mozilla::intl::UnicodeProperties::ToLower(aChar);
return mozilla::unicode::GetLowercase(aChar);
}
static MOZ_ALWAYS_INLINE uint32_t
@ -244,8 +244,7 @@ void ToLowerCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) {
for (uint32_t i = 0; i < aLen; i++) {
uint32_t ch = aIn[i];
if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) {
ch = mozilla::intl::UnicodeProperties::ToLower(
SURROGATE_TO_UCS4(ch, aIn[i + 1]));
ch = mozilla::unicode::GetLowercase(SURROGATE_TO_UCS4(ch, aIn[i + 1]));
NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!");
aOut[i++] = H_SURROGATE(ch);
aOut[i] = L_SURROGATE(ch);
@ -270,15 +269,14 @@ uint32_t ToUpperCase(uint32_t aChar) {
return aChar;
}
return mozilla::intl::UnicodeProperties::ToUpper(aChar);
return mozilla::unicode::GetUppercase(aChar);
}
void ToUpperCase(const char16_t* aIn, char16_t* aOut, uint32_t aLen) {
for (uint32_t i = 0; i < aLen; i++) {
uint32_t ch = aIn[i];
if (i < aLen - 1 && NS_IS_SURROGATE_PAIR(ch, aIn[i + 1])) {
ch = mozilla::intl::UnicodeProperties::ToUpper(
SURROGATE_TO_UCS4(ch, aIn[i + 1]));
ch = mozilla::unicode::GetUppercase(SURROGATE_TO_UCS4(ch, aIn[i + 1]));
NS_ASSERTION(!IS_IN_BMP(ch), "case mapping crossed BMP/SMP boundary!");
aOut[i++] = H_SURROGATE(ch);
aOut[i] = L_SURROGATE(ch);
@ -364,7 +362,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
// we don't go through ToLowerCase here, because we know this isn't
// an ASCII character so the ASCII fast-path there is useless
c = mozilla::intl::UnicodeProperties::ToLower(c);
c = mozilla::unicode::GetLowercase(c);
*aNext = aStr + 2;
return c;
@ -379,7 +377,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
c += (str[1] & 0x3F) << 6;
c += (str[2] & 0x3F);
c = mozilla::intl::UnicodeProperties::ToLower(c);
c = mozilla::unicode::GetLowercase(c);
*aNext = aStr + 3;
return c;
@ -394,7 +392,7 @@ static MOZ_ALWAYS_INLINE uint32_t GetLowerUTF8Codepoint_inline(
c += (str[2] & 0x3F) << 6;
c += (str[3] & 0x3F);
c = mozilla::intl::UnicodeProperties::ToLower(c);
c = mozilla::unicode::GetLowercase(c);
*aNext = aStr + 4;
return c;
@ -516,8 +514,8 @@ uint32_t HashUTF8AsUTF16(const char* aUTF8, uint32_t aLength, bool* aErr) {
}
bool IsSegmentBreakSkipChar(uint32_t u) {
return intl::UnicodeProperties::IsEastAsianWidthFHWexcludingEmoji(u) &&
intl::UnicodeProperties::GetScriptCode(u) != intl::Script::HANGUL;
return unicode::IsEastAsianWidthFHWexcludingEmoji(u) &&
unicode::GetScriptCode(u) != unicode::Script::HANGUL;
}
} // namespace mozilla

View file

@ -177,8 +177,7 @@ enum HSType {
};
static HSType GetHangulSyllableType(uint32_t aCh) {
return HSType(intl::UnicodeProperties::GetIntPropertyValue(
aCh, intl::UnicodeProperties::IntProperty::HangulSyllableType));
return HSType(u_getIntPropertyValue(aCh, UCHAR_HANGUL_SYLLABLE_TYPE));
}
void ClusterIterator::Next() {

View file

@ -7,19 +7,13 @@
#ifndef NS_UNICODEPROPERTIES_H
#define NS_UNICODEPROPERTIES_H
#include "mozilla/intl/UnicodeProperties.h"
#include "nsBidiUtils.h"
#include "nsUGenCategory.h"
#include "nsUnicodeScriptCodes.h"
#include "harfbuzz/hb.h"
struct nsCharProps2 {
// Currently only 4 bits are defined here, so 4 more could be added without
// affecting the storage requirements for this struct. Or we could pack two
// records per byte, at the cost of a slightly more complex accessor.
unsigned char mVertOrient : 2;
unsigned char mIdType : 2;
};
#include "unicode/uchar.h"
#include "unicode/uscript.h"
const nsCharProps2& GetCharProps2(uint32_t aCh);
@ -63,21 +57,45 @@ const uint32_t kEmojiSkinToneLast = 0x1f3ff;
extern const hb_unicode_general_category_t sICUtoHBcategory[];
inline uint32_t GetMirroredChar(uint32_t aCh) { return u_charMirror(aCh); }
inline bool HasMirroredChar(uint32_t aCh) { return u_isMirrored(aCh); }
inline uint8_t GetCombiningClass(uint32_t aCh) {
return u_getCombiningClass(aCh);
}
inline uint8_t GetGeneralCategory(uint32_t aCh) {
return sICUtoHBcategory[intl::UnicodeProperties::CharType(aCh)];
return sICUtoHBcategory[u_charType(aCh)];
}
inline nsCharType GetBidiCat(uint32_t aCh) {
return nsCharType(u_charDirection(aCh));
}
inline int8_t GetNumericValue(uint32_t aCh) {
return intl::UnicodeProperties::GetNumericValue(aCh);
UNumericType type =
UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
return type == U_NT_DECIMAL || type == U_NT_DIGIT
? int8_t(u_getNumericValue(aCh))
: -1;
}
inline uint8_t GetLineBreakClass(uint32_t aCh) {
return intl::UnicodeProperties::GetIntPropertyValue(
aCh, intl::UnicodeProperties::IntProperty::LineBreak);
return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
}
inline uint32_t GetScriptTagForCode(intl::Script aScriptCode) {
const char* tag = intl::UnicodeProperties::GetScriptShortName(aScriptCode);
inline Script GetScriptCode(uint32_t aCh) {
UErrorCode err = U_ZERO_ERROR;
return Script(uscript_getScript(aCh, &err));
}
inline bool HasScript(uint32_t aCh, Script aScript) {
return uscript_hasScript(aCh, UScriptCode(aScript));
}
inline uint32_t GetScriptTagForCode(Script aScriptCode) {
const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
if (tag) {
return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
}
@ -86,22 +104,28 @@ inline uint32_t GetScriptTagForCode(intl::Script aScriptCode) {
}
inline PairedBracketType GetPairedBracketType(uint32_t aCh) {
return PairedBracketType(intl::UnicodeProperties::GetIntPropertyValue(
aCh, intl::UnicodeProperties::IntProperty::BidiPairedBracketType));
return PairedBracketType(
u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
}
inline uint32_t GetPairedBracket(uint32_t aCh) {
return u_getBidiPairedBracket(aCh);
}
inline uint32_t GetUppercase(uint32_t aCh) { return u_toupper(aCh); }
inline uint32_t GetLowercase(uint32_t aCh) { return u_tolower(aCh); }
inline uint32_t GetTitlecaseForLower(
uint32_t aCh) // maps LC to titlecase, UC unchanged
{
return intl::UnicodeProperties::IsLowercase(aCh)
? intl::UnicodeProperties::ToTitle(aCh)
: aCh;
return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
}
inline uint32_t GetTitlecaseForAll(
uint32_t aCh) // maps both UC and LC to titlecase
{
return intl::UnicodeProperties::ToTitle(aCh);
return u_totitle(aCh);
}
inline uint32_t GetFoldedcase(uint32_t aCh) {
@ -111,22 +135,62 @@ inline uint32_t GetFoldedcase(uint32_t aCh) {
if (aCh == 0x0130 || aCh == 0x0131) {
return 'i';
}
return intl::UnicodeProperties::FoldCase(aCh);
return u_foldCase(aCh, U_FOLD_CASE_DEFAULT);
}
inline bool IsEastAsianWidthFHWexcludingEmoji(uint32_t aCh) {
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
case U_EA_FULLWIDTH:
case U_EA_HALFWIDTH:
return true;
case U_EA_WIDE:
return u_hasBinaryProperty(aCh, UCHAR_EMOJI) ? false : true;
case U_EA_AMBIGUOUS:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
inline bool IsEastAsianWidthAFW(uint32_t aCh) {
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
case U_EA_AMBIGUOUS:
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
inline bool IsEastAsianWidthFW(uint32_t aCh) {
switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
case U_EA_FULLWIDTH:
case U_EA_WIDE:
return true;
case U_EA_AMBIGUOUS:
case U_EA_HALFWIDTH:
case U_EA_NARROW:
case U_EA_NEUTRAL:
return false;
}
return false;
}
inline bool IsDefaultIgnorable(uint32_t aCh) {
return intl::UnicodeProperties::HasBinaryProperty(
aCh, intl::UnicodeProperties::BinaryProperty::DefaultIgnorableCodePoint);
return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT);
}
inline EmojiPresentation GetEmojiPresentation(uint32_t aCh) {
if (!intl::UnicodeProperties::HasBinaryProperty(
aCh, intl::UnicodeProperties::BinaryProperty::Emoji)) {
if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
return TextOnly;
}
if (intl::UnicodeProperties::HasBinaryProperty(
aCh, intl::UnicodeProperties::BinaryProperty::EmojiPresentation)) {
if (u_hasBinaryProperty(aCh, UCHAR_EMOJI_PRESENTATION)) {
return EmojiDefault;
}
return TextDefault;
@ -207,6 +271,11 @@ uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);
// to the values we care about at runtime.
bool IsCombiningDiacritic(uint32_t aCh);
// Keep this function in sync with is_math_symbol in base_chars.py.
inline bool IsMathOrMusicSymbol(uint32_t aCh) {
return u_charType(aCh) == U_MATH_SYMBOL || u_charType(aCh) == U_OTHER_SYMBOL;
}
// Remove diacritics from a character
uint32_t GetNaked(uint32_t aCh);

View file

@ -1,3 +1,5 @@
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -9,7 +11,7 @@
*/
/*
* Created on Thu Nov 25 12:44:10 2021 from UCD data files with version info:
* Created on Fri Oct 29 09:00:15 2021 from UCD data files with version info:
*
# Unicode Character Database

View file

@ -1,3 +1,5 @@
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -9,7 +11,7 @@
*/
/*
* Created on Thu Dec 2 15:20:26 2021 from UCD data files with version info:
* Created on Thu Nov 18 12:50:48 2021 from UCD data files with version info:
*
# Unicode Character Database
@ -43,10 +45,20 @@ for the Unicode Character Database, for Version 14.0.0 of the Unicode Standard.
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
*/
#ifndef intl_components_UnicodeScriptCodes_h_
#define intl_components_UnicodeScriptCodes_h_
#ifndef NS_UNICODE_SCRIPT_CODES
#define NS_UNICODE_SCRIPT_CODES
namespace mozilla::intl {
struct nsCharProps2 {
// Currently only 4 bits are defined here, so 4 more could be added without
// affecting the storage requirements for this struct. Or we could pack two
// records per byte, at the cost of a slightly more complex accessor.
unsigned char mVertOrient:2;
unsigned char mIdType:2;
};
namespace mozilla {
namespace unicode {
enum class Script : int16_t {
COMMON = 0,
INHERITED = 1,
@ -251,7 +263,13 @@ enum class Script : int16_t {
INVALID = -1
};
} // namespace mozilla::intl
// mozilla::intl::ScriptExtensionVector assumes sizeof(Script) is equal to
// sizeof(int16_t), so if the data type of Script is changed then
// ScriptExtensionVector needs to be updated accordingly.
static_assert(sizeof(Script) == sizeof(int16_t));
} // namespace unicode
} // namespace mozilla
#endif
/*

View file

@ -1991,7 +1991,7 @@ void nsBidiPresUtils::RemoveBidiContinuation(BidiParagraphData* aBpd,
nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext,
char16_t* aText,
int32_t& aTextLength,
intl::BidiClass aBidiClass) {
nsCharType aCharType) {
nsresult rv = NS_OK;
// ahmed
// adjusted for correct numeral shaping
@ -2011,12 +2011,12 @@ nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext,
case IBMBIDI_NUMERAL_REGULAR:
switch (aBidiClass) {
case intl::BidiClass::EuropeanNumber:
switch (aCharType) {
case eCharType_EuropeanNumber:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
break;
case intl::BidiClass::ArabicNumber:
case eCharType_ArabicNumber:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
break;
@ -2029,22 +2029,20 @@ nsresult nsBidiPresUtils::FormatUnicodeText(nsPresContext* aPresContext,
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions) ==
IBMBIDI_TEXTDIRECTION_RTL) &&
(IS_ARABIC_DIGIT(aText[0]))) ||
(intl::BidiClass::ArabicNumber == aBidiClass)) {
(eCharType_ArabicNumber == aCharType))
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
} else if (intl::BidiClass::EuropeanNumber == aBidiClass) {
else if (eCharType_EuropeanNumber == aCharType)
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
}
break;
case IBMBIDI_NUMERAL_PERSIANCONTEXT:
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions) ==
IBMBIDI_TEXTDIRECTION_RTL) &&
(IS_ARABIC_DIGIT(aText[0]))) ||
(intl::BidiClass::ArabicNumber == aBidiClass)) {
(eCharType_ArabicNumber == aCharType))
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_PERSIAN);
} else if (intl::BidiClass::EuropeanNumber == aBidiClass) {
else if (eCharType_EuropeanNumber == aCharType)
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
}
break;
case IBMBIDI_NUMERAL_NOMINAL:
@ -2076,40 +2074,64 @@ void nsBidiPresUtils::StripBidiControlCharacters(char16_t* aText,
aTextLength -= stripLen;
}
void nsBidiPresUtils::CalculateBidiClass(
intl::Bidi* aBidiEngine, const char16_t* aText, int32_t& aOffset,
int32_t aBidiClassLimit, int32_t& aRunLimit, int32_t& aRunLength,
int32_t& aRunCount, intl::BidiClass& aBidiClass,
intl::BidiClass& aPrevBidiClass) {
#if 0 // XXX: for the future use ???
void
RemoveDiacritics(char16_t* aText,
int32_t& aTextLength)
{
if (aText && (aTextLength > 0) ) {
int32_t offset = 0;
for (int32_t i = 0; i < aTextLength && aText[i]; i++) {
if (IS_BIDI_DIACRITIC(aText[i]) ) {
++offset;
continue;
}
aText[i - offset] = aText[i];
}
aTextLength = i - offset;
aText[aTextLength] = 0;
}
}
#endif
void nsBidiPresUtils::CalculateCharType(intl::Bidi* aBidiEngine,
const char16_t* aText, int32_t& aOffset,
int32_t aCharTypeLimit,
int32_t& aRunLimit, int32_t& aRunLength,
int32_t& aRunCount, uint8_t& aCharType,
uint8_t& aPrevCharType)
{
bool strongTypeFound = false;
int32_t offset;
intl::BidiClass bidiClass;
nsCharType charType;
aBidiClass = intl::BidiClass::OtherNeutral;
aCharType = eCharType_OtherNeutral;
int32_t charLen;
for (offset = aOffset; offset < aBidiClassLimit; offset += charLen) {
for (offset = aOffset; offset < aCharTypeLimit; offset += charLen) {
// Make sure we give RTL chartype to all characters that would be classified
// as Right-To-Left by a bidi platform.
// (May differ from the UnicodeData, eg we set RTL chartype to some NSMs.)
charLen = 1;
uint32_t ch = aText[offset];
if (IS_HEBREW_CHAR(ch)) {
bidiClass = intl::BidiClass::RightToLeft;
charType = eCharType_RightToLeft;
} else if (IS_ARABIC_ALPHABETIC(ch)) {
bidiClass = intl::BidiClass::RightToLeftArabic;
charType = eCharType_RightToLeftArabic;
} else {
if (offset + 1 < aBidiClassLimit &&
if (offset + 1 < aCharTypeLimit &&
NS_IS_SURROGATE_PAIR(ch, aText[offset + 1])) {
ch = SURROGATE_TO_UCS4(ch, aText[offset + 1]);
charLen = 2;
}
bidiClass = intl::UnicodeProperties::GetBidiClass(ch);
charType = unicode::GetBidiCat(ch);
}
if (!BIDICLASS_IS_WEAK(bidiClass)) {
if (strongTypeFound && (bidiClass != aPrevBidiClass) &&
(BIDICLASS_IS_RTL(bidiClass) || BIDICLASS_IS_RTL(aPrevBidiClass))) {
if (!CHARTYPE_IS_WEAK(charType)) {
if (strongTypeFound && (charType != aPrevCharType) &&
(CHARTYPE_IS_RTL(charType) || CHARTYPE_IS_RTL(aPrevCharType))) {
// Stop at this point to ensure uni-directionality of the text
// (from platform's point of view).
// Also, don't mix Arabic and Hebrew content (since platform may
@ -2120,18 +2142,18 @@ void nsBidiPresUtils::CalculateBidiClass(
break;
}
if ((intl::BidiClass::RightToLeftArabic == aPrevBidiClass ||
intl::BidiClass::ArabicNumber == aPrevBidiClass) &&
intl::BidiClass::EuropeanNumber == bidiClass) {
bidiClass = intl::BidiClass::ArabicNumber;
if ((eCharType_RightToLeftArabic == aPrevCharType ||
eCharType_ArabicNumber == aPrevCharType) &&
eCharType_EuropeanNumber == charType) {
charType = eCharType_ArabicNumber;
}
// Set PrevBidiClass to the last strong type in this frame
// Set PrevCharType to the last strong type in this frame
// (for correct numeric shaping)
aPrevBidiClass = bidiClass;
aPrevCharType = charType;
strongTypeFound = true;
aBidiClass = bidiClass;
aCharType = charType;
}
}
aOffset = offset;
@ -2166,8 +2188,8 @@ nsresult nsBidiPresUtils::ProcessText(const char16_t* aText, size_t aLength,
nscoord totalWidth = 0;
int32_t i, start, limit, length;
uint32_t visualStart = 0;
intl::BidiClass bidiClass;
intl::BidiClass prevClass = intl::BidiClass::LeftToRight;
uint8_t charType;
uint8_t prevType = eCharType_LeftToRight;
for (int nPosResolve = 0; nPosResolve < aPosResolveCount; ++nPosResolve) {
aPosResolve[nPosResolve].visualIndex = kNotFound;
@ -2209,17 +2231,17 @@ nsresult nsBidiPresUtils::ProcessText(const char16_t* aText, size_t aLength,
}
while (subRunCount > 0) {
// CalculateBidiClass can increment subRunCount if the run
// CalculateCharType can increment subRunCount if the run
// contains mixed character types
CalculateBidiClass(aBidiEngine, text, lineOffset, typeLimit, subRunLimit,
subRunLength, subRunCount, bidiClass, prevClass);
CalculateCharType(aBidiEngine, text, lineOffset, typeLimit, subRunLimit,
subRunLength, subRunCount, charType, prevType);
nsAutoString runVisualText;
runVisualText.Assign(text + start, subRunLength);
if (int32_t(runVisualText.Length()) < subRunLength)
return NS_ERROR_OUT_OF_MEMORY;
FormatUnicodeText(aPresContext, runVisualText.BeginWriting(),
subRunLength, bidiClass);
subRunLength, (nsCharType)charType);
aprocessor.SetText(runVisualText.get(), subRunLength, dir);
width = aprocessor.GetWidth();

View file

@ -8,7 +8,6 @@
#define nsBidiPresUtils_h___
#include "gfxContext.h"
#include "mozilla/intl/BidiClass.h"
#include "mozilla/intl/BidiEmbeddingLevel.h"
#include "nsBidiUtils.h"
#include "nsHashKeys.h"
@ -224,7 +223,7 @@ class nsBidiPresUtils {
*/
static nsresult FormatUnicodeText(nsPresContext* aPresContext,
char16_t* aText, int32_t& aTextLength,
mozilla::intl::BidiClass aBidiClass);
nsCharType aCharType);
/**
* Reorder plain text using the Unicode Bidi algorithm and send it to
@ -563,12 +562,11 @@ class nsBidiPresUtils {
*/
static void RemoveBidiContinuation(BidiParagraphData* aBpd, nsIFrame* aFrame,
int32_t aFirstIndex, int32_t aLastIndex);
static void CalculateBidiClass(mozilla::intl::Bidi* aBidiEngine,
const char16_t* aText, int32_t& aOffset,
int32_t aBidiClassLimit, int32_t& aRunLimit,
int32_t& aRunLength, int32_t& aRunCount,
mozilla::intl::BidiClass& aBidiClass,
mozilla::intl::BidiClass& aPrevBidiClass);
static void CalculateCharType(mozilla::intl::Bidi* aBidiEngine,
const char16_t* aText, int32_t& aOffset,
int32_t aCharTypeLimit, int32_t& aRunLimit,
int32_t& aRunLength, int32_t& aRunCount,
uint8_t& aCharType, uint8_t& aPrevCharType);
static void StripBidiControlCharacters(char16_t* aText, int32_t& aTextLength);
};

View file

@ -10,12 +10,12 @@
#include "mozilla/BinarySearch.h"
#include "mozilla/ComputedStyle.h"
#include "mozilla/ComputedStyleInlines.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "nsStyleConsts.h"
#include "nsTextFrameUtils.h"
#include "nsFontMetrics.h"
#include "nsDeviceContext.h"
#include "nsUnicodeScriptCodes.h"
using namespace mozilla;
@ -564,7 +564,7 @@ void MathMLTextRunFactory::RebuildTextRun(
// character is actually available.
FontMatchType matchType;
RefPtr<gfxFont> mathFont = fontGroup->FindFontForChar(
ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType);
ch2, 0, 0, unicode::Script::COMMON, nullptr, &matchType);
if (mathFont) {
// Don't apply the CSS style if there is a math font for at least one
// of the transformed character in this text run.
@ -573,7 +573,7 @@ void MathMLTextRunFactory::RebuildTextRun(
// We fallback to the original character.
ch2 = ch;
if (aMFR) {
aMFR->RecordScript(intl::Script::MATHEMATICAL_NOTATION);
aMFR->RecordScript(unicode::Script::MATHEMATICAL_NOTATION);
}
}
}

View file

@ -8370,8 +8370,8 @@ static bool FindFirstLetterRange(const nsTextFragment* aFrag,
// want to allow this to split a ligature.
bool allowSplitLigature;
typedef intl::Script Script;
Script script = intl::UnicodeProperties::GetScriptCode(usv);
typedef unicode::Script Script;
Script script = unicode::GetScriptCode(usv);
switch (script) {
default:
allowSplitLigature = true;

View file

@ -11,7 +11,6 @@
#include "gfxUtils.h"
#include "mozilla/dom/Document.h"
#include "mozilla/gfx/2D.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/ComputedStyle.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/UniquePtr.h"
@ -43,6 +42,7 @@
#include <algorithm>
#include "gfxMathTable.h"
#include "nsUnicodeScriptCodes.h"
using namespace mozilla;
using namespace mozilla::gfx;
@ -1541,7 +1541,7 @@ nsresult nsMathMLChar::StretchInternal(
// and record missing math script otherwise.
gfxMissingFontRecorder* MFR = presContext->MissingFontRecorder();
if (MFR && !fm->GetThebesFontGroup()->GetFirstMathFont()) {
MFR->RecordScript(intl::Script::MATHEMATICAL_NOTATION);
MFR->RecordScript(unicode::Script::MATHEMATICAL_NOTATION);
}
// If the scale_stretchy_operators option is disabled, we are done.

View file

@ -11,14 +11,14 @@
#include "nsServiceManagerUtils.h"
#include "nsUnicharUtils.h"
#include "nsUnicodeProperties.h"
#include "nsUnicodeScriptCodes.h"
#include "harfbuzz/hb.h"
#include "punycode.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Casting.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/intl/Script.h"
// Currently we use the non-transitional processing option -- see
// http://unicode.org/reports/tr46/
@ -30,7 +30,6 @@ const bool kIDNA2008_TransitionalProcessing = false;
#include "ICUUtils.h"
using namespace mozilla;
using namespace mozilla::intl;
using namespace mozilla::unicode;
using namespace mozilla::net;
using mozilla::Preferences;
@ -764,7 +763,7 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
MOZ_ASSERT(idType == IDTYPE_ALLOWED);
// Check for mixed script
Script script = UnicodeProperties::GetScriptCode(ch);
Script script = GetScriptCode(ch);
if (script != Script::COMMON && script != Script::INHERITED &&
script != lastScript) {
if (illegalScriptCombo(script, savedScript)) {
@ -775,8 +774,7 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
// Check for mixed numbering systems
auto genCat = GetGeneralCategory(ch);
if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
uint32_t zeroCharacter =
ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
uint32_t zeroCharacter = ch - GetNumericValue(ch);
if (savedNumberingSystem == 0) {
// If we encounter a decimal number, save the zero character from that
// numbering system.
@ -793,8 +791,8 @@ bool nsIDNService::isLabelSafe(const nsAString& label) {
}
// Check for marks whose expected script doesn't match the base script.
if (lastScript != Script::INVALID) {
UnicodeProperties::ScriptExtensionVector scripts;
auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
mozilla::intl::ScriptExtensionVector scripts;
auto extResult = mozilla::intl::Script::GetExtensions(ch, scripts);
MOZ_ASSERT(extResult.isOk());
if (extResult.isErr()) {
return false;

View file

@ -8,11 +8,11 @@
#include "nsIIDNService.h"
#include "nsCOMPtr.h"
#include "nsUnicodeScriptCodes.h"
#include "nsWeakReference.h"
#include "unicode/uidna.h"
#include "mozilla/Mutex.h"
#include "mozilla/intl/UnicodeScriptCodes.h"
#include "mozilla/net/IDNBlocklistUtils.h"
#include "nsString.h"
@ -147,7 +147,8 @@ class nsIDNService final : public nsIIDNService,
* For the "Moderately restrictive" profile, Latin is also allowed
* with other scripts except Cyrillic and Greek
*/
bool illegalScriptCombo(mozilla::intl::Script script, int32_t& savedScript);
bool illegalScriptCombo(mozilla::unicode::Script script,
int32_t& savedScript);
/**
* Convert a DNS label from ASCII to Unicode using IDNA2008

View file

@ -31,7 +31,6 @@
#include "mozilla/dom/HTMLOptionElement.h"
#include "mozilla/dom/HTMLSelectElement.h"
#include "mozilla/dom/Text.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/intl/WordBreaker.h"
#include "mozilla/StaticPrefs_browser.h"
@ -776,7 +775,7 @@ nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,
// already guaranteed to not be a combining diacritical mark.)
c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex]));
if (!mMatchDiacritics && IsCombiningDiacritic(c) &&
!intl::UnicodeProperties::IsMathOrMusicSymbol(prevChar)) {
!IsMathOrMusicSymbol(prevChar)) {
continue;
}
patc = DecodeChar(patStr, &pindex);

View file

@ -5,7 +5,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "nsUnicodeProperties.h"
// Verify the assertion in SQLFunctions.cpp / nextSearchCandidate that the
// only non-ASCII characters that lower-case to ASCII ones are:
@ -15,7 +15,7 @@ TEST(MatchAutocompleteCasing, CaseAssumption)
{
for (uint32_t c = 128; c < 0x110000; c++) {
if (c != 304 && c != 8490) {
ASSERT_GE(mozilla::intl::UnicodeProperties::ToLower(c), 128U);
ASSERT_GE(mozilla::unicode::GetLowercase(c), 128U);
}
}
}
@ -24,6 +24,6 @@ TEST(MatchAutocompleteCasing, CaseAssumption)
TEST(MatchAutocompleteCasing, CaseAssumption2)
{
for (uint32_t c = 0; c < 128; c++) {
ASSERT_LT(mozilla::intl::UnicodeProperties::ToLower(c), 128U);
ASSERT_LT(mozilla::unicode::GetLowercase(c), 128U);
}
}

View file

@ -6,6 +6,6 @@ devtools/client/debugger/node_modules/
dom/tests/ajax/jquery/
dom/tests/ajax/mochikit/
node_modules/
intl/components/src/UnicodeScriptCodes.h
intl/unicharutil/util/nsSpecialCasingData.cpp
intl/unicharutil/util/nsUnicodePropertyData.cpp
intl/unicharutil/util/nsUnicodeScriptCodes.h