forked from mirrors/gecko-dev
Bug 1552008 - Track column number in the HTML. r=smaug,nchevobbe
Differential Revision: https://phabricator.services.mozilla.com/D170579
This commit is contained in:
parent
b1ca540f9f
commit
44b22460e5
8 changed files with 89 additions and 18 deletions
|
|
@ -1390,6 +1390,9 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
public void start() throws SAXException {
|
||||
initializeWithoutStarting();
|
||||
tokenHandler.startTokenization(this);
|
||||
// CPPONLY: line = 0;
|
||||
// CPPONLY: col = 1;
|
||||
// CPPONLY: nextCharOnNewLine = true;
|
||||
// [NOCPP[
|
||||
startErrorReporting();
|
||||
// ]NOCPP]
|
||||
|
|
@ -6328,6 +6331,8 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
appendStrBuf('\n');
|
||||
}
|
||||
|
||||
// [NOCPP[
|
||||
|
||||
@Inline protected void silentCarriageReturn() {
|
||||
++line;
|
||||
lastCR = true;
|
||||
|
|
@ -6337,6 +6342,8 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
++line;
|
||||
}
|
||||
|
||||
// ]NOCPP]
|
||||
|
||||
private void emitCarriageReturn(@NoLength char[] buf, int pos)
|
||||
throws SAXException {
|
||||
silentCarriageReturn();
|
||||
|
|
@ -7154,11 +7161,15 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
return suspendAfterCurrentNonTextToken;
|
||||
}
|
||||
|
||||
// [NOCPP[
|
||||
|
||||
@Inline protected char checkChar(@NoLength char[] buf, int pos)
|
||||
throws SAXException {
|
||||
return buf[pos];
|
||||
}
|
||||
|
||||
// ]NOCPP]
|
||||
|
||||
public boolean internalEncodingDeclaration(String internalCharset)
|
||||
throws SAXException {
|
||||
if (encodingDeclarationHandler != null) {
|
||||
|
|
|
|||
|
|
@ -8,10 +8,12 @@ using namespace mozilla;
|
|||
|
||||
nsHtml5Speculation::nsHtml5Speculation(nsHtml5OwningUTF16Buffer* aBuffer,
|
||||
int32_t aStart, int32_t aStartLineNumber,
|
||||
int32_t aStartColumnNumber,
|
||||
nsAHtml5TreeBuilderState* aSnapshot)
|
||||
: mBuffer(aBuffer),
|
||||
mStart(aStart),
|
||||
mStartLineNumber(aStartLineNumber),
|
||||
mStartColumnNumber(aStartColumnNumber),
|
||||
mSnapshot(aSnapshot) {
|
||||
MOZ_COUNT_CTOR(nsHtml5Speculation);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
class nsHtml5Speculation final : public nsAHtml5TreeOpSink {
|
||||
public:
|
||||
nsHtml5Speculation(nsHtml5OwningUTF16Buffer* aBuffer, int32_t aStart,
|
||||
int32_t aStartLineNumber,
|
||||
int32_t aStartLineNumber, int32_t aStartColumnNumber,
|
||||
nsAHtml5TreeBuilderState* aSnapshot);
|
||||
|
||||
~nsHtml5Speculation();
|
||||
|
|
@ -27,6 +27,8 @@ class nsHtml5Speculation final : public nsAHtml5TreeOpSink {
|
|||
|
||||
int32_t GetStartLineNumber() { return mStartLineNumber; }
|
||||
|
||||
int32_t GetStartColumnNumber() { return mStartColumnNumber; }
|
||||
|
||||
nsAHtml5TreeBuilderState* GetSnapshot() { return mSnapshot.get(); }
|
||||
|
||||
/**
|
||||
|
|
@ -54,6 +56,11 @@ class nsHtml5Speculation final : public nsAHtml5TreeOpSink {
|
|||
*/
|
||||
int32_t mStartLineNumber;
|
||||
|
||||
/**
|
||||
* The current line number at the start of the speculation.
|
||||
*/
|
||||
int32_t mStartColumnNumber;
|
||||
|
||||
mozilla::UniquePtr<nsAHtml5TreeBuilderState> mSnapshot;
|
||||
|
||||
nsTArray<nsHtml5TreeOperation> mOpQueue;
|
||||
|
|
|
|||
|
|
@ -791,7 +791,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(Span<const uint8_t> aFromSegment,
|
|||
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
|
||||
nsHtml5Speculation* speculation = new nsHtml5Speculation(
|
||||
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
|
||||
mTreeBuilder->newSnapshot());
|
||||
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
|
||||
MOZ_ASSERT(!mFlushTimerArmed, "How did we end up arming the timer?");
|
||||
if (mMode == VIEW_SOURCE_HTML) {
|
||||
mTokenizer->SetViewSourceOpSink(speculation);
|
||||
|
|
@ -1999,7 +1999,7 @@ void nsHtml5StreamParser::DiscardMetaSpeculation() {
|
|||
|
||||
nsHtml5Speculation* speculation = new nsHtml5Speculation(
|
||||
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
|
||||
mTreeBuilder->newSnapshot());
|
||||
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
|
||||
MOZ_ASSERT(!mFlushTimerArmed, "How did we end up arming the timer?");
|
||||
if (mMode == VIEW_SOURCE_HTML) {
|
||||
mTokenizer->SetViewSourceOpSink(speculation);
|
||||
|
|
@ -2486,7 +2486,7 @@ void nsHtml5StreamParser::ParseAvailableData() {
|
|||
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
|
||||
nsHtml5Speculation* speculation = new nsHtml5Speculation(
|
||||
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
|
||||
mTreeBuilder->newSnapshot());
|
||||
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
|
||||
mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(),
|
||||
speculation->GetStartLineNumber());
|
||||
if (mLookingForMetaCharset) {
|
||||
|
|
@ -2649,12 +2649,15 @@ void nsHtml5StreamParser::ContinueAfterScriptsOrEncodingCommitment(
|
|||
mFirstBuffer = speculation->GetBuffer();
|
||||
mFirstBuffer->setStart(speculation->GetStart());
|
||||
mTokenizer->setLineNumber(speculation->GetStartLineNumber());
|
||||
mTokenizer->setColumnNumberAndResetNextLine(
|
||||
speculation->GetStartColumnNumber());
|
||||
|
||||
nsContentUtils::ReportToConsole(
|
||||
nsIScriptError::warningFlag, "DOM Events"_ns,
|
||||
mExecutor->GetDocument(), nsContentUtils::eDOM_PROPERTIES,
|
||||
"SpeculationFailed2", nsTArray<nsString>(), nullptr, u""_ns,
|
||||
speculation->GetStartLineNumber());
|
||||
speculation->GetStartLineNumber(),
|
||||
speculation->GetStartColumnNumber());
|
||||
|
||||
nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
|
||||
while (buffer) {
|
||||
|
|
|
|||
|
|
@ -400,6 +400,9 @@ void nsHtml5Tokenizer::addAttributeWithValue() {
|
|||
void nsHtml5Tokenizer::start() {
|
||||
initializeWithoutStarting();
|
||||
tokenHandler->startTokenization(this);
|
||||
line = 0;
|
||||
col = 1;
|
||||
nextCharOnNewLine = true;
|
||||
}
|
||||
|
||||
bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) {
|
||||
|
|
|
|||
|
|
@ -429,15 +429,6 @@ class nsHtml5Tokenizer {
|
|||
appendStrBuf('\n');
|
||||
}
|
||||
|
||||
protected:
|
||||
inline void silentCarriageReturn() {
|
||||
++line;
|
||||
lastCR = true;
|
||||
}
|
||||
|
||||
inline void silentLineFeed() { ++line; }
|
||||
|
||||
private:
|
||||
void emitCarriageReturn(char16_t* buf, int32_t pos);
|
||||
void emitReplacementCharacter(char16_t* buf, int32_t pos);
|
||||
void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);
|
||||
|
|
@ -456,9 +447,6 @@ class nsHtml5Tokenizer {
|
|||
void suspendAfterCurrentTokenIfNotInText();
|
||||
bool suspensionAfterCurrentNonTextTokenPending();
|
||||
|
||||
protected:
|
||||
inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; }
|
||||
|
||||
public:
|
||||
bool internalEncodingDeclaration(nsHtml5String internalCharset);
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,63 @@
|
|||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
private:
|
||||
inline void silentCarriageReturn() {
|
||||
nextCharOnNewLine = true;
|
||||
lastCR = true;
|
||||
}
|
||||
|
||||
inline void silentLineFeed() { nextCharOnNewLine = true; }
|
||||
|
||||
inline char16_t checkChar(char16_t* buf, int32_t pos) {
|
||||
// The name of this method comes from the validator.
|
||||
// We aren't checking a char here. We read the next
|
||||
// UTF-16 code unit and, before returning it, adjust
|
||||
// the line and column numbers.
|
||||
char16_t c = buf[pos];
|
||||
if (MOZ_UNLIKELY(nextCharOnNewLine)) {
|
||||
// By changing the line and column here instead
|
||||
// of doing so eagerly when seeing the line break
|
||||
// causes the line break itself to be considered
|
||||
// column-wise at the end of a line.
|
||||
line++;
|
||||
col = 1;
|
||||
nextCharOnNewLine = false;
|
||||
} else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
|
||||
// SpiderMonkey wants to count scalar values
|
||||
// instead of UTF-16 code units. We omit low
|
||||
// surrogates from the count so that only the
|
||||
// high surrogate increments the count for
|
||||
// two-code-unit scalar values.
|
||||
//
|
||||
// It's somewhat questionable from the performance
|
||||
// perspective to make the human-perceivable column
|
||||
// count correct for non-BMP characters in the case
|
||||
// where there is a single scalar value per extended
|
||||
// grapheme cluster when even on the BMP there are
|
||||
// various cases where the scalar count doesn't make
|
||||
// much sense as a human-perceived "column count" due
|
||||
// to extended grapheme clusters consisting of more
|
||||
// than one scalar value.
|
||||
col++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int32_t col;
|
||||
bool nextCharOnNewLine;
|
||||
|
||||
public:
|
||||
inline int32_t getColumnNumber() { return col; }
|
||||
|
||||
inline void setColumnNumberAndResetNextLine(int32_t aCol) {
|
||||
col = aCol;
|
||||
// The restored position only ever points to the position of
|
||||
// script tag's > character, so we can unconditionally use
|
||||
// `false` below.
|
||||
nextCharOnNewLine = false;
|
||||
}
|
||||
|
||||
inline nsHtml5HtmlAttributes* GetAttributes() { return attributes; }
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ var expectedErrors = [
|
|||
isWarning: true },
|
||||
{ errorMessage: "The start of the document was reparsed, because there were non-ASCII characters in the part of the document that was unsuccessfully searched for a meta tag before falling back to the XML declaration syntax. A meta tag at the start of the head part should be used instead of the XML declaration syntax.",
|
||||
sourceName: "http://mochi.test:8888/tests/parser/htmlparser/tests/mochitest/file_bug672453_xml_speculation_fail.html",
|
||||
lineNumber: 11,
|
||||
lineNumber: 10,
|
||||
isWarning: true },
|
||||
];
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue