Bug 1886953 - Introduce deCOMtaminated IDNA functions and benchmark them r=necko-reviewers,valentin

The plan is to replace the implementations of these functions with an
ICU4X/Rust back end and then make the XPCOM versions call into these.

Migrating existing C++ callers is out of scope for this changeset.

Differential Revision: https://phabricator.services.mozilla.com/D205420
This commit is contained in:
Henri Sivonen 2024-04-03 06:43:28 +00:00
parent 765b7d2fc0
commit 8eb5a3e1d1
6 changed files with 117 additions and 14 deletions

View file

@ -693,17 +693,6 @@ int32_t NS_GetDefaultPort(const char* scheme,
return NS_SUCCEEDED(rv) ? port : -1;
}
/**
* This function is a helper function to apply the ToAscii conversion
* to a string
*/
bool NS_StringToACE(const nsACString& idn, nsACString& result) {
nsCOMPtr<nsIIDNService> idnSrv = do_GetService(NS_IDNSERVICE_CONTRACTID);
if (!idnSrv) return false;
nsresult rv = idnSrv->ConvertUTF8toACE(idn, result);
return NS_SUCCEEDED(rv);
}
int32_t NS_GetRealPort(nsIURI* aURI) {
int32_t port;
nsresult rv = aURI->GetPort(&port);
@ -721,6 +710,20 @@ int32_t NS_GetRealPort(nsIURI* aURI) {
return NS_GetDefaultPort(scheme.get());
}
nsresult NS_DomainToASCII(const nsACString& aHost, nsACString& aASCII) {
return nsStandardURL::GetIDNService()->ConvertUTF8toACE(aHost, aASCII);
}
nsresult NS_DomainToDisplay(const nsACString& aHost, nsACString& aDisplay) {
bool ignored;
return nsStandardURL::GetIDNService()->ConvertToDisplayIDN(aHost, &ignored,
aDisplay);
}
nsresult NS_DomainToUnicode(const nsACString& aHost, nsACString& aUnicode) {
return nsStandardURL::GetIDNService()->ConvertACEtoUTF8(aHost, aUnicode);
}
nsresult NS_NewInputStreamChannelInternal(
nsIChannel** outChannel, nsIURI* aUri,
already_AddRefed<nsIInputStream> aStream, const nsACString& aContentType,

View file

@ -294,10 +294,28 @@ int32_t NS_GetDefaultPort(const char* scheme,
nsIIOService* ioService = nullptr);
/**
* This function is a helper function to apply the ToAscii conversion
* to a string
* The UTS #46 ToASCII operation as parametrized by the WHATWG URL Standard.
*
* Use this function to prepare a host name for network protocols.
*/
bool NS_StringToACE(const nsACString& idn, nsACString& result);
nsresult NS_DomainToASCII(const nsACString& aHost, nsACString& aASCII);
/**
* The UTS #46 ToUnicode operation as parametrized by the WHATWG URL Standard,
* except potentially misleading labels are treated according to ToASCII
* instead.
*
* Use this function to prepare a host name for display to the user.
*/
nsresult NS_DomainToDisplay(const nsACString& aHost, nsACString& aDisplay);
/**
* The UTS #46 ToUnicode operation as parametrized by the WHATWG URL Standard.
*
* It's most likely incorrect to call this function, and `NS_DomainToDisplay`
* should typically be called instead.
*/
nsresult NS_DomainToUnicode(const nsACString& aHost, nsACString& aUnicode);
/**
* This function is a helper function to get a protocol's default port if the

View file

@ -622,6 +622,8 @@ nsresult nsStandardURL::NormalizeIPv4(const nsACString& host,
return NS_OK;
}
nsIIDNService* nsStandardURL::GetIDNService() { return gIDN.get(); }
nsresult nsStandardURL::NormalizeIDN(const nsCString& host, nsCString& result) {
result.Truncate();
mDisplayHost.Truncate();

View file

@ -198,6 +198,8 @@ class nsStandardURL : public nsIFileURL,
static nsresult NormalizeIPv4(const nsACString& host, nsCString& result);
static nsIIDNService* GetIDNService();
protected:
// enum used in a few places to specify how .ref attribute should be handled
enum RefHandlingEnum { eIgnoreRef, eHonorRef, eReplaceRef };

View file

@ -0,0 +1,77 @@
#include "gtest/gtest.h"
#include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH
#include "gtest/BlackBox.h"
#include "nsNetUtil.h"
#define TEST_COUNT 10000
class TestIDNA : public ::testing::Test {
protected:
void SetUp() override {
// Intentionally Assign and not AssignLiteral
// to simulate the usual heap case.
mPlainASCII.Assign("example.com");
mLeadingDigitASCII.Assign("1test.example");
mUnicodeMixed.Assign("مثال.example");
mPunycodeMixed.Assign("xn--mgbh0fb.example");
mUnicodeLTR.Assign("නම.උදාහරණ");
mPunycodeLTR.Assign("xn--r0co.xn--ozc8dl2c3bxd");
mUnicodeRTL.Assign("الاسم.مثال");
mPunycodeRTL.Assign("xn--mgba0b1dh.xn--mgbh0fb");
// Intentionally not assigning to mEmpty
}
public:
nsCString mPlainASCII;
nsCString mLeadingDigitASCII;
nsCString mUnicodeMixed;
nsCString mPunycodeMixed;
nsCString mUnicodeLTR;
nsCString mPunycodeLTR;
nsCString mUnicodeRTL;
nsCString mPunycodeRTL;
nsCString mEmpty; // Extremely suspicious measurement!
};
#define IDNA_ITERATIONS 50000
#define IDNA_BENCH(name, func, src) \
MOZ_GTEST_BENCH_F(TestIDNA, name, [this] { \
for (int i = 0; i < IDNA_ITERATIONS; i++) { \
nsCString dst; \
func(*mozilla::BlackBox(&src), *mozilla::BlackBox(&dst)); \
} \
});
IDNA_BENCH(BenchToASCIIPlainASCII, NS_DomainToASCII, mPlainASCII);
IDNA_BENCH(BenchToASCIILeadingDigitASCII, NS_DomainToASCII, mLeadingDigitASCII);
IDNA_BENCH(BenchToASCIIUnicodeMixed, NS_DomainToASCII, mUnicodeMixed);
IDNA_BENCH(BenchToASCIIPunycodeMixed, NS_DomainToASCII, mPunycodeMixed);
IDNA_BENCH(BenchToASCIIUnicodeLTR, NS_DomainToASCII, mUnicodeLTR);
IDNA_BENCH(BenchToASCIIPunycodeLTR, NS_DomainToASCII, mPunycodeLTR);
IDNA_BENCH(BenchToASCIIUnicodeRTL, NS_DomainToASCII, mUnicodeRTL);
IDNA_BENCH(BenchToASCIIPunycodeRTL, NS_DomainToASCII, mPunycodeRTL);
IDNA_BENCH(BenchToASCIIEmpty, NS_DomainToASCII, mEmpty);
IDNA_BENCH(BenchToDisplayPlainASCII, NS_DomainToDisplay, mPlainASCII);
IDNA_BENCH(BenchToDisplayLeadingDigitASCII, NS_DomainToDisplay,
mLeadingDigitASCII);
IDNA_BENCH(BenchToDisplayUnicodeMixed, NS_DomainToDisplay, mUnicodeMixed);
IDNA_BENCH(BenchToDisplayPunycodeMixed, NS_DomainToDisplay, mPunycodeMixed);
IDNA_BENCH(BenchToDisplayUnicodeLTR, NS_DomainToDisplay, mUnicodeLTR);
IDNA_BENCH(BenchToDisplayPunycodeLTR, NS_DomainToDisplay, mPunycodeLTR);
IDNA_BENCH(BenchToDisplayUnicodeRTL, NS_DomainToDisplay, mUnicodeRTL);
IDNA_BENCH(BenchToDisplayPunycodeRTL, NS_DomainToDisplay, mPunycodeRTL);
IDNA_BENCH(BenchToDisplayEmpty, NS_DomainToDisplay, mEmpty);
IDNA_BENCH(BenchToUnicodePlainASCII, NS_DomainToUnicode, mPlainASCII);
IDNA_BENCH(BenchToUnicodeLeadingDigitASCII, NS_DomainToUnicode,
mLeadingDigitASCII);
IDNA_BENCH(BenchToUnicodeUnicodeMixed, NS_DomainToUnicode, mUnicodeMixed);
IDNA_BENCH(BenchToUnicodePunycodeMixed, NS_DomainToUnicode, mPunycodeMixed);
IDNA_BENCH(BenchToUnicodeUnicodeLTR, NS_DomainToUnicode, mUnicodeLTR);
IDNA_BENCH(BenchToUnicodePunycodeLTR, NS_DomainToUnicode, mPunycodeLTR);
IDNA_BENCH(BenchToUnicodeUnicodeRTL, NS_DomainToUnicode, mUnicodeRTL);
IDNA_BENCH(BenchToUnicodePunycodeRTL, NS_DomainToUnicode, mPunycodeRTL);
IDNA_BENCH(BenchToUnicodeEmpty, NS_DomainToUnicode, mEmpty);

View file

@ -17,6 +17,7 @@ UNIFIED_SOURCES += [
"TestHttpAuthUtils.cpp",
"TestHttpChannel.cpp",
"TestHttpResponseHead.cpp",
"TestIDNA.cpp",
"TestInputStreamTransport.cpp",
"TestIsValidIp.cpp",
"TestLinkHeader.cpp",