Bug 741776 - Treat JSON, WebVTT and AppCache manifests as UTF-8 when loaded as plain text. r=Ehsan

MozReview-Commit-ID: 5UvYqJVvX0r --HG-- extra : rebase_source : 5a6f3dfd97fb06810fde9a4b8b650a7a922a7c20
2016-06-09 14:29:30 +03:00 · 2016-06-09 14:29:30 +03:00 · a36fff43c5
commit a36fff43c5
parent a4563a219e
9 changed files with 39 additions and 6 deletions
--- a/dom/base/nsContentUtils.cpp
+++ b/dom/base/nsContentUtils.cpp
@ -3757,6 +3757,16 @@ nsContentUtils::IsPlainTextType(const nsACString& aContentType)
         IsScriptType(aContentType);
 }
 bool
 nsContentUtils::IsUtf8OnlyPlainTextType(const nsACString& aContentType)
 {
  // NOTE: This must be a subset of the list in IsPlainTextType().
  return aContentType.EqualsLiteral(TEXT_CACHE_MANIFEST) ||
         aContentType.EqualsLiteral(APPLICATION_JSON) ||
         aContentType.EqualsLiteral(TEXT_JSON) ||
         aContentType.EqualsLiteral(TEXT_VTT);
 }
 bool
 nsContentUtils::GetWrapperSafeScriptFilename(nsIDocument* aDocument,
                                             nsIURI* aURI,
--- a/dom/base/nsContentUtils.h
+++ b/dom/base/nsContentUtils.h
@ -1026,15 +1026,21 @@ public:
  static bool IsChildOfSameType(nsIDocument* aDoc);
  /**
-  '* Returns true if the content-type is any of the supported script types.
+   * Returns true if the content-type is any of the supported script types.
   */
  static bool IsScriptType(const nsACString& aContentType);
  /**
-  '* Returns true if the content-type will be rendered as plain-text.
+   * Returns true if the content-type will be rendered as plain-text.
   */
  static bool IsPlainTextType(const nsACString& aContentType);
  /**
   * Returns true iff the type is rendered as plain text and doesn't support
   * non-UTF-8 encodings.
   */
  static bool IsUtf8OnlyPlainTextType(const nsACString& aContentType);
  /**
   * Get the script file name to use when compiling the script
   * referenced by aURI. In cases where there's no need for any extra
--- a/dom/html/nsHTMLDocument.cpp
+++ b/dom/html/nsHTMLDocument.cpp
@ -550,6 +550,9 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
    return NS_ERROR_INVALID_ARG;
  }
  bool forceUtf8 = plainText &&
    nsContentUtils::IsUtf8OnlyPlainTextType(contentType);
  bool loadAsHtml5 = true;
  if (!viewSource && xhtml) {
@ -669,7 +672,12 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
    }
  }
-  if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
+  if (forceUtf8) {
    charsetSource = kCharsetFromUtf8OnlyMime;
    charset.AssignLiteral("UTF-8");
    parserCharsetSource = charsetSource;
    parserCharset = charset;
  } else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
    charsetSource = IsHTMLDocument() ? kCharsetFromFallback
                                     : kCharsetFromDocTypeDefault;
    charset.AssignLiteral("UTF-8");
@ -3618,7 +3626,7 @@ nsHTMLDocument::WillIgnoreCharsetOverride()
    MOZ_ASSERT(mType == eXHTML);
    return true;
  }
-  if (mCharacterSetSource == kCharsetFromByteOrderMark) {
+  if (mCharacterSetSource >= kCharsetFromByteOrderMark) {
    return true;
  }
  if (!EncodingUtils::IsAsciiCompatible(mCharacterSet)) {
--- a/dom/html/reftests/741776-1-ref.html
+++ b/dom/html/reftests/741776-1-ref.html
@ -0,0 +1 @@
 <meta charset=utf-8><pre>ää
--- a/dom/html/reftests/741776-1.vtt
+++ b/dom/html/reftests/741776-1.vtt
@ -0,0 +1 @@
 ää
--- a/dom/html/reftests/reftest.list
+++ b/dom/html/reftests/reftest.list
@ -27,6 +27,7 @@ include toblob-todataurl/reftest.list
 == 610935.html 610935-ref.html
 == 649134-1.html 649134-ref.html
 skip-if(Android) == 649134-2.html 649134-2-ref.html
 == 741776-1.vtt 741776-1-ref.html
 == bug448564-1_malformed.html bug448564-1_well-formed.html
 == bug448564-1_malformed.html bug448564-1_ideal.html
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@ -981,13 +981,15 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
  }
  nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
-  if (!wyciwygChannel) {
+  if (mCharsetSource < kCharsetFromUtf8OnlyMime && !wyciwygChannel) {
    // we aren't ready to commit to an encoding yet
    // leave converter uninstantiated for now
    return NS_OK;
  }
-  // We are reloading a document.open()ed doc.
+  // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
  // a browsing context. In the latter case, there's no need to remove the
  // BOM manually here, because the UTF-8 decoder removes it.
  mReparseForbidden = true;
  mFeedChardet = false;
--- a/parser/nsCharsetSource.h
+++ b/parser/nsCharsetSource.h
@ -22,5 +22,6 @@
 #define kCharsetFromParentForced       13 // propagates to child frames
 #define kCharsetFromUserForced         14 // propagates to child frames
 #define kCharsetFromByteOrderMark      15
 #define kCharsetFromUtf8OnlyMime       16 // For JSON, WebVTT and such
 #endif /* nsCharsetSource_h_ */
--- a/uriloader/exthandler/nsExternalHelperAppService.cpp
+++ b/uriloader/exthandler/nsExternalHelperAppService.cpp
@ -591,6 +591,9 @@ static const nsExtraMimeTypeEntry extraMimeEntries[] =
  { IMAGE_SVG_XML, "svg", "Scalable Vector Graphics" },
  { MESSAGE_RFC822, "eml", "RFC-822 data" },
  { TEXT_PLAIN, "txt,text", "Text File" },
  { APPLICATION_JSON, "json", "JavaScript Object Notation" },
  { TEXT_VTT, "vtt", "Web Video Text Tracks" },
  { TEXT_CACHE_MANIFEST, "appcache", "Application Cache Manifest" },
  { TEXT_HTML, "html,htm,shtml,ehtml", "HyperText Markup Language" },
  { "application/xhtml+xml", "xhtml,xht", "Extensible HyperText Markup Language" },
  { APPLICATION_MATHML_XML, "mml", "Mathematical Markup Language" },