forked from mirrors/gecko-dev
Bug 1857221 - Avoid tracking the line and column number when parsing innerHTML. r=smaug
Differential Revision: https://phabricator.services.mozilla.com/D191270
This commit is contained in:
parent
5e39eb1c49
commit
fef29849ec
6 changed files with 358 additions and 262 deletions
|
|
@ -1401,9 +1401,19 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
public void start() throws SAXException {
|
||||
initializeWithoutStarting();
|
||||
tokenHandler.startTokenization(this);
|
||||
// CPPONLY: line = 0;
|
||||
// CPPONLY: col = 1;
|
||||
// CPPONLY: nextCharOnNewLine = true;
|
||||
// CPPONLY: if (mViewSource) {
|
||||
// CPPONLY: line = 1;
|
||||
// CPPONLY: col = -1;
|
||||
// CPPONLY: nextCharOnNewLine = false;
|
||||
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
|
||||
// CPPONLY: line = 0;
|
||||
// CPPONLY: col = 1;
|
||||
// CPPONLY: nextCharOnNewLine = true;
|
||||
// CPPONLY: } else {
|
||||
// CPPONLY: line = -1;
|
||||
// CPPONLY: col = -1;
|
||||
// CPPONLY: nextCharOnNewLine = false;
|
||||
// CPPONLY: }
|
||||
// [NOCPP[
|
||||
startErrorReporting();
|
||||
// ]NOCPP]
|
||||
|
|
@ -1469,6 +1479,8 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
// CPPONLY: mViewSource.SetBuffer(buffer);
|
||||
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
|
||||
// CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
|
||||
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
|
||||
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
|
||||
// CPPONLY: } else {
|
||||
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
|
||||
// CPPONLY: }
|
||||
|
|
@ -6320,24 +6332,24 @@ public class Tokenizer implements Locator, Locator2 {
|
|||
forceQuirks = false;
|
||||
}
|
||||
|
||||
@Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
|
||||
private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
|
||||
throws SAXException {
|
||||
silentCarriageReturn();
|
||||
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
|
||||
}
|
||||
|
||||
@Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
|
||||
private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
|
||||
throws SAXException {
|
||||
silentLineFeed();
|
||||
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
|
||||
}
|
||||
|
||||
@Inline private void appendStrBufLineFeed() {
|
||||
private void appendStrBufLineFeed() {
|
||||
silentLineFeed();
|
||||
appendStrBuf('\n');
|
||||
}
|
||||
|
||||
@Inline private void appendStrBufCarriageReturn() {
|
||||
private void appendStrBufCarriageReturn() {
|
||||
silentCarriageReturn();
|
||||
appendStrBuf('\n');
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -43,7 +43,6 @@
|
|||
#include "nsHtml5NamedCharacters.h"
|
||||
#include "nsHtml5NamedCharactersAccel.h"
|
||||
#include "nsHtml5String.h"
|
||||
#include "nsHtml5TokenizerLoopPolicies.h"
|
||||
#include "nsIContent.h"
|
||||
#include "nsTraceRefcnt.h"
|
||||
|
||||
|
|
@ -409,26 +408,15 @@ class nsHtml5Tokenizer {
|
|||
int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf,
|
||||
bool reconsume, int32_t returnState, int32_t endPos);
|
||||
void initDoctypeFields();
|
||||
inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() {
|
||||
silentCarriageReturn();
|
||||
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
|
||||
}
|
||||
|
||||
inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() {
|
||||
silentLineFeed();
|
||||
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
|
||||
}
|
||||
|
||||
inline void appendStrBufLineFeed() {
|
||||
silentLineFeed();
|
||||
appendStrBuf('\n');
|
||||
}
|
||||
|
||||
inline void appendStrBufCarriageReturn() {
|
||||
silentCarriageReturn();
|
||||
appendStrBuf('\n');
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
|
||||
template <class P>
|
||||
void adjustDoubleHyphenAndAppendToStrBufLineFeed();
|
||||
template <class P>
|
||||
void appendStrBufLineFeed();
|
||||
template <class P>
|
||||
void appendStrBufCarriageReturn();
|
||||
template <class P>
|
||||
void emitCarriageReturn(char16_t* buf, int32_t pos);
|
||||
void emitReplacementCharacter(char16_t* buf, int32_t pos);
|
||||
void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);
|
||||
|
|
|
|||
|
|
@ -2,49 +2,11 @@
|
|||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
friend struct nsHtml5ViewSourcePolicy;
|
||||
friend struct nsHtml5LineColPolicy;
|
||||
friend struct nsHtml5FastestPolicy;
|
||||
|
||||
private:
|
||||
inline void silentCarriageReturn() {
|
||||
nextCharOnNewLine = true;
|
||||
lastCR = true;
|
||||
}
|
||||
|
||||
inline void silentLineFeed() { nextCharOnNewLine = true; }
|
||||
|
||||
inline char16_t checkChar(char16_t* buf, int32_t pos) {
|
||||
// The name of this method comes from the validator.
|
||||
// We aren't checking a char here. We read the next
|
||||
// UTF-16 code unit and, before returning it, adjust
|
||||
// the line and column numbers.
|
||||
char16_t c = buf[pos];
|
||||
if (MOZ_UNLIKELY(nextCharOnNewLine)) {
|
||||
// By changing the line and column here instead
|
||||
// of doing so eagerly when seeing the line break
|
||||
// causes the line break itself to be considered
|
||||
// column-wise at the end of a line.
|
||||
line++;
|
||||
col = 1;
|
||||
nextCharOnNewLine = false;
|
||||
} else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
|
||||
// SpiderMonkey wants to count scalar values
|
||||
// instead of UTF-16 code units. We omit low
|
||||
// surrogates from the count so that only the
|
||||
// high surrogate increments the count for
|
||||
// two-code-unit scalar values.
|
||||
//
|
||||
// It's somewhat questionable from the performance
|
||||
// perspective to make the human-perceivable column
|
||||
// count correct for non-BMP characters in the case
|
||||
// where there is a single scalar value per extended
|
||||
// grapheme cluster when even on the BMP there are
|
||||
// various cases where the scalar count doesn't make
|
||||
// much sense as a human-perceived "column count" due
|
||||
// to extended grapheme clusters consisting of more
|
||||
// than one scalar value.
|
||||
col++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int32_t col;
|
||||
bool nextCharOnNewLine;
|
||||
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@
|
|||
#define nsHtml5TokenizerLoopPolicies_h
|
||||
|
||||
/**
|
||||
* This policy does not report tokenizer transitions anywhere. To be used
|
||||
* when _not_ viewing source.
|
||||
* This policy does not report tokenizer transitions anywhere and does not
|
||||
* track line and column numbers. To be used for innerHTML.
|
||||
*/
|
||||
struct nsHtml5SilentPolicy {
|
||||
struct nsHtml5FastestPolicy {
|
||||
static const bool reportErrors = false;
|
||||
static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
|
||||
bool aReconsume, int32_t aPos) {
|
||||
|
|
@ -17,6 +17,77 @@ struct nsHtml5SilentPolicy {
|
|||
}
|
||||
static void completedNamedCharacterReference(
|
||||
nsHtml5Highlighter* aHighlighter) {}
|
||||
|
||||
static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
|
||||
int32_t pos) {
|
||||
return buf[pos];
|
||||
}
|
||||
|
||||
static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
|
||||
aTokenizer->lastCR = true;
|
||||
}
|
||||
|
||||
static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* This policy does not report tokenizer transitions anywhere. To be used
|
||||
* when _not_ viewing source and when not parsing innerHTML (or other
|
||||
* script execution-preventing fragment).
|
||||
*/
|
||||
struct nsHtml5LineColPolicy {
|
||||
static const bool reportErrors = false;
|
||||
static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
|
||||
bool aReconsume, int32_t aPos) {
|
||||
return aState;
|
||||
}
|
||||
static void completedNamedCharacterReference(
|
||||
nsHtml5Highlighter* aHighlighter) {}
|
||||
|
||||
static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
|
||||
int32_t pos) {
|
||||
// The name of this method comes from the validator.
|
||||
// We aren't checking a char here. We read the next
|
||||
// UTF-16 code unit and, before returning it, adjust
|
||||
// the line and column numbers.
|
||||
char16_t c = buf[pos];
|
||||
if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
|
||||
// By changing the line and column here instead
|
||||
// of doing so eagerly when seeing the line break
|
||||
// causes the line break itself to be considered
|
||||
// column-wise at the end of a line.
|
||||
aTokenizer->line++;
|
||||
aTokenizer->col = 1;
|
||||
aTokenizer->nextCharOnNewLine = false;
|
||||
} else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
|
||||
// SpiderMonkey wants to count scalar values
|
||||
// instead of UTF-16 code units. We omit low
|
||||
// surrogates from the count so that only the
|
||||
// high surrogate increments the count for
|
||||
// two-code-unit scalar values.
|
||||
//
|
||||
// It's somewhat questionable from the performance
|
||||
// perspective to make the human-perceivable column
|
||||
// count correct for non-BMP characters in the case
|
||||
// where there is a single scalar value per extended
|
||||
// grapheme cluster when even on the BMP there are
|
||||
// various cases where the scalar count doesn't make
|
||||
// much sense as a human-perceived "column count" due
|
||||
// to extended grapheme clusters consisting of more
|
||||
// than one scalar value.
|
||||
aTokenizer->col++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
|
||||
aTokenizer->nextCharOnNewLine = true;
|
||||
aTokenizer->lastCR = true;
|
||||
}
|
||||
|
||||
static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
|
||||
aTokenizer->nextCharOnNewLine = true;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -33,6 +104,20 @@ struct nsHtml5ViewSourcePolicy {
|
|||
nsHtml5Highlighter* aHighlighter) {
|
||||
aHighlighter->CompletedNamedCharacterReference();
|
||||
}
|
||||
|
||||
static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
|
||||
int32_t pos) {
|
||||
return buf[pos];
|
||||
}
|
||||
|
||||
static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
|
||||
aTokenizer->line++;
|
||||
aTokenizer->lastCR = true;
|
||||
}
|
||||
|
||||
static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
|
||||
aTokenizer->line++;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // nsHtml5TokenizerLoopPolicies_h
|
||||
|
|
|
|||
|
|
@ -111,6 +111,15 @@ nsHtml5TreeBuilder(nsAHtml5TreeOpSink* aOpSink, nsHtml5TreeOpStage* aStage,
|
|||
|
||||
~nsHtml5TreeBuilder();
|
||||
|
||||
bool WantsLineAndColumn() {
|
||||
// Perhaps just checking mBuilder would be sufficient.
|
||||
// For createContextualFragment, we have non-null mBuilder and
|
||||
// false for mPreventScriptExecution. However, do the line and
|
||||
// column that get attached to script elements make any sense
|
||||
// anyway in that case?
|
||||
return !(mBuilder && mPreventScriptExecution);
|
||||
}
|
||||
|
||||
void StartPlainTextViewSource(const nsAutoString& aTitle);
|
||||
|
||||
void StartPlainText();
|
||||
|
|
|
|||
Loading…
Reference in a new issue