Bug 1857221 - Avoid tracking the line and column number when parsing innerHTML. r=smaug

Differential Revision: https://phabricator.services.mozilla.com/D191270
This commit is contained in:
Henri Sivonen 2023-10-23 09:07:22 +00:00
parent 5e39eb1c49
commit fef29849ec
6 changed files with 358 additions and 262 deletions

View file

@ -1401,9 +1401,19 @@ public class Tokenizer implements Locator, Locator2 {
public void start() throws SAXException {
initializeWithoutStarting();
tokenHandler.startTokenization(this);
// CPPONLY: line = 0;
// CPPONLY: col = 1;
// CPPONLY: nextCharOnNewLine = true;
// CPPONLY: if (mViewSource) {
// CPPONLY: line = 1;
// CPPONLY: col = -1;
// CPPONLY: nextCharOnNewLine = false;
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
// CPPONLY: line = 0;
// CPPONLY: col = 1;
// CPPONLY: nextCharOnNewLine = true;
// CPPONLY: } else {
// CPPONLY: line = -1;
// CPPONLY: col = -1;
// CPPONLY: nextCharOnNewLine = false;
// CPPONLY: }
// [NOCPP[
startErrorReporting();
// ]NOCPP]
@ -1469,6 +1479,8 @@ public class Tokenizer implements Locator, Locator2 {
// CPPONLY: mViewSource.SetBuffer(buffer);
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: } else {
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: }
@ -6320,24 +6332,24 @@ public class Tokenizer implements Locator, Locator2 {
forceQuirks = false;
}
@Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
throws SAXException {
silentCarriageReturn();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
@Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
throws SAXException {
silentLineFeed();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
@Inline private void appendStrBufLineFeed() {
private void appendStrBufLineFeed() {
silentLineFeed();
appendStrBuf('\n');
}
@Inline private void appendStrBufCarriageReturn() {
private void appendStrBufCarriageReturn() {
silentCarriageReturn();
appendStrBuf('\n');
}

File diff suppressed because it is too large Load diff

View file

@ -43,7 +43,6 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5NamedCharactersAccel.h"
#include "nsHtml5String.h"
#include "nsHtml5TokenizerLoopPolicies.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
@ -409,26 +408,15 @@ class nsHtml5Tokenizer {
int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf,
bool reconsume, int32_t returnState, int32_t endPos);
void initDoctypeFields();
inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() {
silentCarriageReturn();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() {
silentLineFeed();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
inline void appendStrBufLineFeed() {
silentLineFeed();
appendStrBuf('\n');
}
inline void appendStrBufCarriageReturn() {
silentCarriageReturn();
appendStrBuf('\n');
}
template <class P>
void adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
template <class P>
void adjustDoubleHyphenAndAppendToStrBufLineFeed();
template <class P>
void appendStrBufLineFeed();
template <class P>
void appendStrBufCarriageReturn();
template <class P>
void emitCarriageReturn(char16_t* buf, int32_t pos);
void emitReplacementCharacter(char16_t* buf, int32_t pos);
void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);

View file

@ -2,49 +2,11 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
friend struct nsHtml5ViewSourcePolicy;
friend struct nsHtml5LineColPolicy;
friend struct nsHtml5FastestPolicy;
private:
inline void silentCarriageReturn() {
nextCharOnNewLine = true;
lastCR = true;
}
inline void silentLineFeed() { nextCharOnNewLine = true; }
inline char16_t checkChar(char16_t* buf, int32_t pos) {
// The name of this method comes from the validator.
// We aren't checking a char here. We read the next
// UTF-16 code unit and, before returning it, adjust
// the line and column numbers.
char16_t c = buf[pos];
if (MOZ_UNLIKELY(nextCharOnNewLine)) {
// By changing the line and column here instead
// of doing so eagerly when seeing the line break
// causes the line break itself to be considered
// column-wise at the end of a line.
line++;
col = 1;
nextCharOnNewLine = false;
} else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
// SpiderMonkey wants to count scalar values
// instead of UTF-16 code units. We omit low
// surrogates from the count so that only the
// high surrogate increments the count for
// two-code-unit scalar values.
//
// It's somewhat questionable from the performance
// perspective to make the human-perceivable column
// count correct for non-BMP characters in the case
// where there is a single scalar value per extended
// grapheme cluster when even on the BMP there are
// various cases where the scalar count doesn't make
// much sense as a human-perceived "column count" due
// to extended grapheme clusters consisting of more
// than one scalar value.
col++;
}
return c;
}
int32_t col;
bool nextCharOnNewLine;

View file

@ -6,10 +6,10 @@
#define nsHtml5TokenizerLoopPolicies_h
/**
* This policy does not report tokenizer transitions anywhere. To be used
* when _not_ viewing source.
* This policy does not report tokenizer transitions anywhere and does not
* track line and column numbers. To be used for innerHTML.
*/
struct nsHtml5SilentPolicy {
struct nsHtml5FastestPolicy {
static const bool reportErrors = false;
static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
bool aReconsume, int32_t aPos) {
@ -17,6 +17,77 @@ struct nsHtml5SilentPolicy {
}
static void completedNamedCharacterReference(
nsHtml5Highlighter* aHighlighter) {}
static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
int32_t pos) {
return buf[pos];
}
static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
aTokenizer->lastCR = true;
}
static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {}
};
/**
* This policy does not report tokenizer transitions anywhere. To be used
* when _not_ viewing source and when not parsing innerHTML (or other
* script execution-preventing fragment).
*/
struct nsHtml5LineColPolicy {
static const bool reportErrors = false;
static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
bool aReconsume, int32_t aPos) {
return aState;
}
static void completedNamedCharacterReference(
nsHtml5Highlighter* aHighlighter) {}
static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
int32_t pos) {
// The name of this method comes from the validator.
// We aren't checking a char here. We read the next
// UTF-16 code unit and, before returning it, adjust
// the line and column numbers.
char16_t c = buf[pos];
if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
// By changing the line and column here instead
// of doing so eagerly when seeing the line break
// causes the line break itself to be considered
// column-wise at the end of a line.
aTokenizer->line++;
aTokenizer->col = 1;
aTokenizer->nextCharOnNewLine = false;
} else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
// SpiderMonkey wants to count scalar values
// instead of UTF-16 code units. We omit low
// surrogates from the count so that only the
// high surrogate increments the count for
// two-code-unit scalar values.
//
// It's somewhat questionable from the performance
// perspective to make the human-perceivable column
// count correct for non-BMP characters in the case
// where there is a single scalar value per extended
// grapheme cluster when even on the BMP there are
// various cases where the scalar count doesn't make
// much sense as a human-perceived "column count" due
// to extended grapheme clusters consisting of more
// than one scalar value.
aTokenizer->col++;
}
return c;
}
static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
aTokenizer->nextCharOnNewLine = true;
aTokenizer->lastCR = true;
}
static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
aTokenizer->nextCharOnNewLine = true;
}
};
/**
@ -33,6 +104,20 @@ struct nsHtml5ViewSourcePolicy {
nsHtml5Highlighter* aHighlighter) {
aHighlighter->CompletedNamedCharacterReference();
}
static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
int32_t pos) {
return buf[pos];
}
static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
aTokenizer->line++;
aTokenizer->lastCR = true;
}
static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
aTokenizer->line++;
}
};
#endif // nsHtml5TokenizerLoopPolicies_h

View file

@ -111,6 +111,15 @@ nsHtml5TreeBuilder(nsAHtml5TreeOpSink* aOpSink, nsHtml5TreeOpStage* aStage,
~nsHtml5TreeBuilder();
bool WantsLineAndColumn() {
// Perhaps just checking mBuilder would be sufficient.
// For createContextualFragment, we have non-null mBuilder and
// false for mPreventScriptExecution. However, do the line and
// column that get attached to script elements make any sense
// anyway in that case?
return !(mBuilder && mPreventScriptExecution);
}
void StartPlainTextViewSource(const nsAutoString& aTitle);
void StartPlainText();