Bug 741776 - Treat JSON, WebVTT and AppCache manifests as UTF-8 when loaded as plain text. r=Ehsan

MozReview-Commit-ID: 5UvYqJVvX0r

--HG--
extra : rebase_source : 5a6f3dfd97fb06810fde9a4b8b650a7a922a7c20
This commit is contained in:
Henri Sivonen 2016-06-09 14:29:30 +03:00
parent a4563a219e
commit a36fff43c5
9 changed files with 39 additions and 6 deletions

View file

@ -3757,6 +3757,16 @@ nsContentUtils::IsPlainTextType(const nsACString& aContentType)
IsScriptType(aContentType); IsScriptType(aContentType);
} }
bool
nsContentUtils::IsUtf8OnlyPlainTextType(const nsACString& aContentType)
{
// NOTE: This must be a subset of the list in IsPlainTextType().
return aContentType.EqualsLiteral(TEXT_CACHE_MANIFEST) ||
aContentType.EqualsLiteral(APPLICATION_JSON) ||
aContentType.EqualsLiteral(TEXT_JSON) ||
aContentType.EqualsLiteral(TEXT_VTT);
}
bool bool
nsContentUtils::GetWrapperSafeScriptFilename(nsIDocument* aDocument, nsContentUtils::GetWrapperSafeScriptFilename(nsIDocument* aDocument,
nsIURI* aURI, nsIURI* aURI,

View file

@ -1026,15 +1026,21 @@ public:
static bool IsChildOfSameType(nsIDocument* aDoc); static bool IsChildOfSameType(nsIDocument* aDoc);
/** /**
'* Returns true if the content-type is any of the supported script types. * Returns true if the content-type is any of the supported script types.
*/ */
static bool IsScriptType(const nsACString& aContentType); static bool IsScriptType(const nsACString& aContentType);
/** /**
'* Returns true if the content-type will be rendered as plain-text. * Returns true if the content-type will be rendered as plain-text.
*/ */
static bool IsPlainTextType(const nsACString& aContentType); static bool IsPlainTextType(const nsACString& aContentType);
/**
* Returns true iff the type is rendered as plain text and doesn't support
* non-UTF-8 encodings.
*/
static bool IsUtf8OnlyPlainTextType(const nsACString& aContentType);
/** /**
* Get the script file name to use when compiling the script * Get the script file name to use when compiling the script
* referenced by aURI. In cases where there's no need for any extra * referenced by aURI. In cases where there's no need for any extra

View file

@ -550,6 +550,9 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
return NS_ERROR_INVALID_ARG; return NS_ERROR_INVALID_ARG;
} }
bool forceUtf8 = plainText &&
nsContentUtils::IsUtf8OnlyPlainTextType(contentType);
bool loadAsHtml5 = true; bool loadAsHtml5 = true;
if (!viewSource && xhtml) { if (!viewSource && xhtml) {
@ -669,7 +672,12 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
} }
} }
if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR if (forceUtf8) {
charsetSource = kCharsetFromUtf8OnlyMime;
charset.AssignLiteral("UTF-8");
parserCharsetSource = charsetSource;
parserCharset = charset;
} else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
charsetSource = IsHTMLDocument() ? kCharsetFromFallback charsetSource = IsHTMLDocument() ? kCharsetFromFallback
: kCharsetFromDocTypeDefault; : kCharsetFromDocTypeDefault;
charset.AssignLiteral("UTF-8"); charset.AssignLiteral("UTF-8");
@ -3618,7 +3626,7 @@ nsHTMLDocument::WillIgnoreCharsetOverride()
MOZ_ASSERT(mType == eXHTML); MOZ_ASSERT(mType == eXHTML);
return true; return true;
} }
if (mCharacterSetSource == kCharsetFromByteOrderMark) { if (mCharacterSetSource >= kCharsetFromByteOrderMark) {
return true; return true;
} }
if (!EncodingUtils::IsAsciiCompatible(mCharacterSet)) { if (!EncodingUtils::IsAsciiCompatible(mCharacterSet)) {

View file

@ -0,0 +1 @@
<meta charset=utf-8><pre>ää

View file

@ -0,0 +1 @@
ää

View file

@ -27,6 +27,7 @@ include toblob-todataurl/reftest.list
== 610935.html 610935-ref.html == 610935.html 610935-ref.html
== 649134-1.html 649134-ref.html == 649134-1.html 649134-ref.html
skip-if(Android) == 649134-2.html 649134-2-ref.html skip-if(Android) == 649134-2.html 649134-2-ref.html
== 741776-1.vtt 741776-1-ref.html
== bug448564-1_malformed.html bug448564-1_well-formed.html == bug448564-1_malformed.html bug448564-1_well-formed.html
== bug448564-1_malformed.html bug448564-1_ideal.html == bug448564-1_malformed.html bug448564-1_ideal.html

View file

@ -981,13 +981,15 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
} }
nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest)); nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
if (!wyciwygChannel) { if (mCharsetSource < kCharsetFromUtf8OnlyMime && !wyciwygChannel) {
// we aren't ready to commit to an encoding yet // we aren't ready to commit to an encoding yet
// leave converter uninstantiated for now // leave converter uninstantiated for now
return NS_OK; return NS_OK;
} }
// We are reloading a document.open()ed doc. // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
// a browsing context. In the latter case, there's no need to remove the
// BOM manually here, because the UTF-8 decoder removes it.
mReparseForbidden = true; mReparseForbidden = true;
mFeedChardet = false; mFeedChardet = false;

View file

@ -22,5 +22,6 @@
#define kCharsetFromParentForced 13 // propagates to child frames #define kCharsetFromParentForced 13 // propagates to child frames
#define kCharsetFromUserForced 14 // propagates to child frames #define kCharsetFromUserForced 14 // propagates to child frames
#define kCharsetFromByteOrderMark 15 #define kCharsetFromByteOrderMark 15
#define kCharsetFromUtf8OnlyMime 16 // For JSON, WebVTT and such
#endif /* nsCharsetSource_h_ */ #endif /* nsCharsetSource_h_ */

View file

@ -591,6 +591,9 @@ static const nsExtraMimeTypeEntry extraMimeEntries[] =
{ IMAGE_SVG_XML, "svg", "Scalable Vector Graphics" }, { IMAGE_SVG_XML, "svg", "Scalable Vector Graphics" },
{ MESSAGE_RFC822, "eml", "RFC-822 data" }, { MESSAGE_RFC822, "eml", "RFC-822 data" },
{ TEXT_PLAIN, "txt,text", "Text File" }, { TEXT_PLAIN, "txt,text", "Text File" },
{ APPLICATION_JSON, "json", "JavaScript Object Notation" },
{ TEXT_VTT, "vtt", "Web Video Text Tracks" },
{ TEXT_CACHE_MANIFEST, "appcache", "Application Cache Manifest" },
{ TEXT_HTML, "html,htm,shtml,ehtml", "HyperText Markup Language" }, { TEXT_HTML, "html,htm,shtml,ehtml", "HyperText Markup Language" },
{ "application/xhtml+xml", "xhtml,xht", "Extensible HyperText Markup Language" }, { "application/xhtml+xml", "xhtml,xht", "Extensible HyperText Markup Language" },
{ APPLICATION_MATHML_XML, "mml", "Mathematical Markup Language" }, { APPLICATION_MATHML_XML, "mml", "Mathematical Markup Language" },