Bug 1552008 - Track column number in the HTML. r=smaug,nchevobbe

Differential Revision: https://phabricator.services.mozilla.com/D170579
This commit is contained in:
Henri Sivonen 2023-03-16 16:44:17 +00:00
parent b1ca540f9f
commit 44b22460e5
8 changed files with 89 additions and 18 deletions

View file

@ -1390,6 +1390,9 @@ public class Tokenizer implements Locator, Locator2 {
public void start() throws SAXException {
initializeWithoutStarting();
tokenHandler.startTokenization(this);
// CPPONLY: line = 0;
// CPPONLY: col = 1;
// CPPONLY: nextCharOnNewLine = true;
// [NOCPP[
startErrorReporting();
// ]NOCPP]
@ -6328,6 +6331,8 @@ public class Tokenizer implements Locator, Locator2 {
appendStrBuf('\n');
}
// [NOCPP[
@Inline protected void silentCarriageReturn() {
++line;
lastCR = true;
@ -6337,6 +6342,8 @@ public class Tokenizer implements Locator, Locator2 {
++line;
}
// ]NOCPP]
private void emitCarriageReturn(@NoLength char[] buf, int pos)
throws SAXException {
silentCarriageReturn();
@ -7154,11 +7161,15 @@ public class Tokenizer implements Locator, Locator2 {
return suspendAfterCurrentNonTextToken;
}
// [NOCPP[
@Inline protected char checkChar(@NoLength char[] buf, int pos)
throws SAXException {
return buf[pos];
}
// ]NOCPP]
public boolean internalEncodingDeclaration(String internalCharset)
throws SAXException {
if (encodingDeclarationHandler != null) {

View file

@ -8,10 +8,12 @@ using namespace mozilla;
nsHtml5Speculation::nsHtml5Speculation(nsHtml5OwningUTF16Buffer* aBuffer,
int32_t aStart, int32_t aStartLineNumber,
int32_t aStartColumnNumber,
nsAHtml5TreeBuilderState* aSnapshot)
: mBuffer(aBuffer),
mStart(aStart),
mStartLineNumber(aStartLineNumber),
mStartColumnNumber(aStartColumnNumber),
mSnapshot(aSnapshot) {
MOZ_COUNT_CTOR(nsHtml5Speculation);
}

View file

@ -16,7 +16,7 @@
class nsHtml5Speculation final : public nsAHtml5TreeOpSink {
public:
nsHtml5Speculation(nsHtml5OwningUTF16Buffer* aBuffer, int32_t aStart,
int32_t aStartLineNumber,
int32_t aStartLineNumber, int32_t aStartColumnNumber,
nsAHtml5TreeBuilderState* aSnapshot);
~nsHtml5Speculation();
@ -27,6 +27,8 @@ class nsHtml5Speculation final : public nsAHtml5TreeOpSink {
int32_t GetStartLineNumber() { return mStartLineNumber; }
int32_t GetStartColumnNumber() { return mStartColumnNumber; }
nsAHtml5TreeBuilderState* GetSnapshot() { return mSnapshot.get(); }
/**
@ -54,6 +56,11 @@ class nsHtml5Speculation final : public nsAHtml5TreeOpSink {
*/
int32_t mStartLineNumber;
/**
* The current line number at the start of the speculation.
*/
int32_t mStartColumnNumber;
mozilla::UniquePtr<nsAHtml5TreeBuilderState> mSnapshot;
nsTArray<nsHtml5TreeOperation> mOpQueue;

View file

@ -791,7 +791,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(Span<const uint8_t> aFromSegment,
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
nsHtml5Speculation* speculation = new nsHtml5Speculation(
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
mTreeBuilder->newSnapshot());
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
MOZ_ASSERT(!mFlushTimerArmed, "How did we end up arming the timer?");
if (mMode == VIEW_SOURCE_HTML) {
mTokenizer->SetViewSourceOpSink(speculation);
@ -1999,7 +1999,7 @@ void nsHtml5StreamParser::DiscardMetaSpeculation() {
nsHtml5Speculation* speculation = new nsHtml5Speculation(
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
mTreeBuilder->newSnapshot());
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
MOZ_ASSERT(!mFlushTimerArmed, "How did we end up arming the timer?");
if (mMode == VIEW_SOURCE_HTML) {
mTokenizer->SetViewSourceOpSink(speculation);
@ -2486,7 +2486,7 @@ void nsHtml5StreamParser::ParseAvailableData() {
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
nsHtml5Speculation* speculation = new nsHtml5Speculation(
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
mTreeBuilder->newSnapshot());
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(),
speculation->GetStartLineNumber());
if (mLookingForMetaCharset) {
@ -2649,12 +2649,15 @@ void nsHtml5StreamParser::ContinueAfterScriptsOrEncodingCommitment(
mFirstBuffer = speculation->GetBuffer();
mFirstBuffer->setStart(speculation->GetStart());
mTokenizer->setLineNumber(speculation->GetStartLineNumber());
mTokenizer->setColumnNumberAndResetNextLine(
speculation->GetStartColumnNumber());
nsContentUtils::ReportToConsole(
nsIScriptError::warningFlag, "DOM Events"_ns,
mExecutor->GetDocument(), nsContentUtils::eDOM_PROPERTIES,
"SpeculationFailed2", nsTArray<nsString>(), nullptr, u""_ns,
speculation->GetStartLineNumber());
speculation->GetStartLineNumber(),
speculation->GetStartColumnNumber());
nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
while (buffer) {

View file

@ -400,6 +400,9 @@ void nsHtml5Tokenizer::addAttributeWithValue() {
void nsHtml5Tokenizer::start() {
initializeWithoutStarting();
tokenHandler->startTokenization(this);
line = 0;
col = 1;
nextCharOnNewLine = true;
}
bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) {

View file

@ -429,15 +429,6 @@ class nsHtml5Tokenizer {
appendStrBuf('\n');
}
protected:
inline void silentCarriageReturn() {
++line;
lastCR = true;
}
inline void silentLineFeed() { ++line; }
private:
void emitCarriageReturn(char16_t* buf, int32_t pos);
void emitReplacementCharacter(char16_t* buf, int32_t pos);
void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);
@ -456,9 +447,6 @@ class nsHtml5Tokenizer {
void suspendAfterCurrentTokenIfNotInText();
bool suspensionAfterCurrentNonTextTokenPending();
protected:
inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; }
public:
bool internalEncodingDeclaration(nsHtml5String internalCharset);

View file

@ -2,6 +2,63 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
private:
inline void silentCarriageReturn() {
nextCharOnNewLine = true;
lastCR = true;
}
inline void silentLineFeed() { nextCharOnNewLine = true; }
inline char16_t checkChar(char16_t* buf, int32_t pos) {
// The name of this method comes from the validator.
// We aren't checking a char here. We read the next
// UTF-16 code unit and, before returning it, adjust
// the line and column numbers.
char16_t c = buf[pos];
if (MOZ_UNLIKELY(nextCharOnNewLine)) {
// By changing the line and column here instead
// of doing so eagerly when seeing the line break
// causes the line break itself to be considered
// column-wise at the end of a line.
line++;
col = 1;
nextCharOnNewLine = false;
} else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
// SpiderMonkey wants to count scalar values
// instead of UTF-16 code units. We omit low
// surrogates from the count so that only the
// high surrogate increments the count for
// two-code-unit scalar values.
//
// It's somewhat questionable from the performance
// perspective to make the human-perceivable column
// count correct for non-BMP characters in the case
// where there is a single scalar value per extended
// grapheme cluster when even on the BMP there are
// various cases where the scalar count doesn't make
// much sense as a human-perceived "column count" due
// to extended grapheme clusters consisting of more
// than one scalar value.
col++;
}
return c;
}
int32_t col;
bool nextCharOnNewLine;
public:
inline int32_t getColumnNumber() { return col; }
inline void setColumnNumberAndResetNextLine(int32_t aCol) {
col = aCol;
// The restored position only ever points to the position of
// script tag's > character, so we can unconditionally use
// `false` below.
nextCharOnNewLine = false;
}
inline nsHtml5HtmlAttributes* GetAttributes() { return attributes; }
/**

View file

@ -104,7 +104,7 @@ var expectedErrors = [
isWarning: true },
{ errorMessage: "The start of the document was reparsed, because there were non-ASCII characters in the part of the document that was unsuccessfully searched for a meta tag before falling back to the XML declaration syntax. A meta tag at the start of the head part should be used instead of the XML declaration syntax.",
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_xml_speculation_fail.html",
lineNumber: 11,
lineNumber: 10,
isWarning: true },
];