forked from mirrors/gecko-dev
		
	 5b8dabf526
			
		
	
	
		5b8dabf526
		
	
	
	
	
		
			
			And stop suppressing -Wimplicit-fallthrough warnings. We no longer need to suppress these clang warnings because the generated parser code now includes MOZ_FALLTHROUGH annotations. --HG-- extra : rebase_source : d766c9b17de878138df6d949ee720dfaf0ed370a
		
			
				
	
	
		
			7125 lines
		
	
	
	
		
			313 KiB
		
	
	
	
		
			Java
		
	
	
	
	
	
			
		
		
	
	
			7125 lines
		
	
	
	
		
			313 KiB
		
	
	
	
		
			Java
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2005-2007 Henri Sivonen
 | |
|  * Copyright (c) 2007-2015 Mozilla Foundation
 | |
|  * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
 | |
|  * Foundation, and Opera Software ASA.
 | |
|  *
 | |
|  * Permission is hereby granted, free of charge, to any person obtaining a
 | |
|  * copy of this software and associated documentation files (the "Software"),
 | |
|  * to deal in the Software without restriction, including without limitation
 | |
|  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 | |
|  * and/or sell copies of the Software, and to permit persons to whom the
 | |
|  * Software is furnished to do so, subject to the following conditions:
 | |
|  *
 | |
|  * The above copyright notice and this permission notice shall be included in
 | |
|  * all copies or substantial portions of the Software.
 | |
|  *
 | |
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 | |
|  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | |
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 | |
|  * DEALINGS IN THE SOFTWARE.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * The comments following this one that use the same comment syntax as this
 | |
|  * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007
 | |
|  * amended as of June 18 2008 and May 31 2010.
 | |
|  * That document came with this statement:
 | |
|  * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and
 | |
|  * Opera Software ASA. You are granted a license to use, reproduce and
 | |
|  * create derivative works of this document."
 | |
|  */
 | |
| 
 | |
| package nu.validator.htmlparser.impl;
 | |
| 
 | |
| import org.xml.sax.ErrorHandler;
 | |
| import org.xml.sax.Locator;
 | |
| import org.xml.sax.SAXException;
 | |
| import org.xml.sax.SAXParseException;
 | |
| 
 | |
| import nu.validator.htmlparser.annotation.Auto;
 | |
| import nu.validator.htmlparser.annotation.CharacterName;
 | |
| import nu.validator.htmlparser.annotation.Const;
 | |
| import nu.validator.htmlparser.annotation.Inline;
 | |
| import nu.validator.htmlparser.annotation.Local;
 | |
| import nu.validator.htmlparser.annotation.NoLength;
 | |
| import nu.validator.htmlparser.common.EncodingDeclarationHandler;
 | |
| import nu.validator.htmlparser.common.Interner;
 | |
| import nu.validator.htmlparser.common.TokenHandler;
 | |
| import nu.validator.htmlparser.common.XmlViolationPolicy;
 | |
| 
 | |
| /**
 | |
|  * An implementation of
 | |
|  * https://html.spec.whatwg.org/multipage/syntax.html#tokenization
 | |
|  *
 | |
|  * This class implements the <code>Locator</code> interface. This is not an
 | |
|  * incidental implementation detail: Users of this class are encouraged to make
 | |
|  * use of the <code>Locator</code> nature.
 | |
|  *
 | |
|  * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
 | |
|  * can be configured to treat these conditions as fatal or to coerce the infoset
 | |
|  * to something that XML 1.0 allows.
 | |
|  *
 | |
|  * @version $Id$
 | |
|  * @author hsivonen
 | |
|  */
 | |
| public class Tokenizer implements Locator {
 | |
| 
 | |
|     private static final int DATA_AND_RCDATA_MASK = ~1;
 | |
| 
 | |
|     public static final int DATA = 0;
 | |
| 
 | |
|     public static final int RCDATA = 1;
 | |
| 
 | |
|     public static final int SCRIPT_DATA = 2;
 | |
| 
 | |
|     public static final int RAWTEXT = 3;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_ESCAPED = 4;
 | |
| 
 | |
|     public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
 | |
| 
 | |
|     public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
 | |
| 
 | |
|     public static final int ATTRIBUTE_VALUE_UNQUOTED = 7;
 | |
| 
 | |
|     public static final int PLAINTEXT = 8;
 | |
| 
 | |
|     public static final int TAG_OPEN = 9;
 | |
| 
 | |
|     public static final int CLOSE_TAG_OPEN = 10;
 | |
| 
 | |
|     public static final int TAG_NAME = 11;
 | |
| 
 | |
|     public static final int BEFORE_ATTRIBUTE_NAME = 12;
 | |
| 
 | |
|     public static final int ATTRIBUTE_NAME = 13;
 | |
| 
 | |
|     public static final int AFTER_ATTRIBUTE_NAME = 14;
 | |
| 
 | |
|     public static final int BEFORE_ATTRIBUTE_VALUE = 15;
 | |
| 
 | |
|     public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
 | |
| 
 | |
|     public static final int BOGUS_COMMENT = 17;
 | |
| 
 | |
|     public static final int MARKUP_DECLARATION_OPEN = 18;
 | |
| 
 | |
|     public static final int DOCTYPE = 19;
 | |
| 
 | |
|     public static final int BEFORE_DOCTYPE_NAME = 20;
 | |
| 
 | |
|     public static final int DOCTYPE_NAME = 21;
 | |
| 
 | |
|     public static final int AFTER_DOCTYPE_NAME = 22;
 | |
| 
 | |
|     public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
 | |
| 
 | |
|     public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
 | |
| 
 | |
|     public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
 | |
| 
 | |
|     public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
 | |
| 
 | |
|     public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
 | |
| 
 | |
|     public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
 | |
| 
 | |
|     public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
 | |
| 
 | |
|     public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
 | |
| 
 | |
|     public static final int BOGUS_DOCTYPE = 31;
 | |
| 
 | |
|     public static final int COMMENT_START = 32;
 | |
| 
 | |
|     public static final int COMMENT_START_DASH = 33;
 | |
| 
 | |
|     public static final int COMMENT = 34;
 | |
| 
 | |
|     public static final int COMMENT_END_DASH = 35;
 | |
| 
 | |
|     public static final int COMMENT_END = 36;
 | |
| 
 | |
|     public static final int COMMENT_END_BANG = 37;
 | |
| 
 | |
|     public static final int NON_DATA_END_TAG_NAME = 38;
 | |
| 
 | |
|     public static final int MARKUP_DECLARATION_HYPHEN = 39;
 | |
| 
 | |
|     public static final int MARKUP_DECLARATION_OCTYPE = 40;
 | |
| 
 | |
|     public static final int DOCTYPE_UBLIC = 41;
 | |
| 
 | |
|     public static final int DOCTYPE_YSTEM = 42;
 | |
| 
 | |
|     public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
 | |
| 
 | |
|     public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
 | |
| 
 | |
|     public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
 | |
| 
 | |
|     public static final int CONSUME_CHARACTER_REFERENCE = 46;
 | |
| 
 | |
|     public static final int CONSUME_NCR = 47;
 | |
| 
 | |
|     public static final int CHARACTER_REFERENCE_TAIL = 48;
 | |
| 
 | |
|     public static final int HEX_NCR_LOOP = 49;
 | |
| 
 | |
|     public static final int DECIMAL_NRC_LOOP = 50;
 | |
| 
 | |
|     public static final int HANDLE_NCR_VALUE = 51;
 | |
| 
 | |
|     public static final int HANDLE_NCR_VALUE_RECONSUME = 52;
 | |
| 
 | |
|     public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53;
 | |
| 
 | |
|     public static final int SELF_CLOSING_START_TAG = 54;
 | |
| 
 | |
|     public static final int CDATA_START = 55;
 | |
| 
 | |
|     public static final int CDATA_SECTION = 56;
 | |
| 
 | |
|     public static final int CDATA_RSQB = 57;
 | |
| 
 | |
|     public static final int CDATA_RSQB_RSQB = 58;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_ESCAPE_START = 60;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_ESCAPED_DASH = 62;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
 | |
| 
 | |
|     public static final int BOGUS_COMMENT_HYPHEN = 64;
 | |
| 
 | |
|     public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
 | |
| 
 | |
|     public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
 | |
| 
 | |
|     public static final int PROCESSING_INSTRUCTION = 73;
 | |
| 
 | |
|     public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
 | |
| 
 | |
|     /**
 | |
|      * Magic value for UTF-16 operations.
 | |
|      */
 | |
|     private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
 | |
| 
 | |
|     /**
 | |
|      * UTF-16 code unit array containing less than and greater than for emitting
 | |
|      * those characters on certain parse errors.
 | |
|      */
 | |
|     private static final @NoLength char[] LT_GT = { '<', '>' };
 | |
| 
 | |
|     /**
 | |
|      * UTF-16 code unit array containing less than and solidus for emitting
 | |
|      * those characters on certain parse errors.
 | |
|      */
 | |
|     private static final @NoLength char[] LT_SOLIDUS = { '<', '/' };
 | |
| 
 | |
|     /**
 | |
|      * UTF-16 code unit array containing ]] for emitting those characters on
 | |
|      * state transitions.
 | |
|      */
 | |
|     private static final @NoLength char[] RSQB_RSQB = { ']', ']' };
 | |
| 
 | |
|     /**
 | |
|      * Array version of U+FFFD.
 | |
|      */
 | |
|     private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * Array version of space.
 | |
|      */
 | |
|     private static final @NoLength char[] SPACE = { ' ' };
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     /**
 | |
|      * Array version of line feed.
 | |
|      */
 | |
|     private static final @NoLength char[] LF = { '\n' };
 | |
| 
 | |
|     /**
 | |
|      * "CDATA[" as <code>char[]</code>
 | |
|      */
 | |
|     private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T',
 | |
|             'A', '[' };
 | |
| 
 | |
|     /**
 | |
|      * "octype" as <code>char[]</code>
 | |
|      */
 | |
|     private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p',
 | |
|             'e' };
 | |
| 
 | |
|     /**
 | |
|      * "ublic" as <code>char[]</code>
 | |
|      */
 | |
|     private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' };
 | |
| 
 | |
|     /**
 | |
|      * "ystem" as <code>char[]</code>
 | |
|      */
 | |
|     private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' };
 | |
| 
 | |
|     private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' };
 | |
| 
 | |
|     private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' };
 | |
| 
 | |
|     private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' };
 | |
| 
 | |
|     private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't',
 | |
|             'e', 'x', 't' };
 | |
| 
 | |
|     private static final char[] XMP_ARR = { 'x', 'm', 'p' };
 | |
| 
 | |
|     private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r',
 | |
|             'e', 'a' };
 | |
| 
 | |
|     private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' };
 | |
| 
 | |
|     private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e',
 | |
|             'd' };
 | |
| 
 | |
|     private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i',
 | |
|             'p', 't' };
 | |
| 
 | |
|     private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm',
 | |
|             'e', 's' };
 | |
| 
 | |
|     /**
 | |
|      * The token handler.
 | |
|      */
 | |
|     protected final TokenHandler tokenHandler;
 | |
| 
 | |
|     protected EncodingDeclarationHandler encodingDeclarationHandler;
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * The error handler.
 | |
|      */
 | |
|     protected ErrorHandler errorHandler;
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     /**
 | |
|      * Whether the previous char read was CR.
 | |
|      */
 | |
|     protected boolean lastCR;
 | |
| 
 | |
|     protected int stateSave;
 | |
| 
 | |
|     private int returnStateSave;
 | |
| 
 | |
|     protected int index;
 | |
| 
 | |
|     private boolean forceQuirks;
 | |
| 
 | |
|     private char additional;
 | |
| 
 | |
|     private int entCol;
 | |
| 
 | |
|     private int firstCharKey;
 | |
| 
 | |
|     private int lo;
 | |
| 
 | |
|     private int hi;
 | |
| 
 | |
|     private int candidate;
 | |
| 
 | |
|     private int charRefBufMark;
 | |
| 
 | |
|     protected int value;
 | |
| 
 | |
|     private boolean seenDigits;
 | |
| 
 | |
|     protected int cstart;
 | |
| 
 | |
|     /**
 | |
|      * The SAX public id for the resource being tokenized. (Only passed to back
 | |
|      * as part of locator data.)
 | |
|      */
 | |
|     private String publicId;
 | |
| 
 | |
|     /**
 | |
|      * The SAX system id for the resource being tokenized. (Only passed to back
 | |
|      * as part of locator data.)
 | |
|      */
 | |
|     private String systemId;
 | |
| 
 | |
|     /**
 | |
|      * Buffer for bufferable things other than those that fit the description
 | |
|      * of <code>charRefBuf</code>.
 | |
|      */
 | |
|     private @Auto char[] strBuf;
 | |
| 
 | |
|     /**
 | |
|      * Number of significant <code>char</code>s in <code>strBuf</code>.
 | |
|      */
 | |
|     private int strBufLen;
 | |
| 
 | |
|     /**
 | |
|      * Buffer for characters that might form a character reference but may
 | |
|      * end up not forming one.
 | |
|      */
 | |
|     private final @Auto char[] charRefBuf;
 | |
| 
 | |
|     /**
 | |
|      * Number of significant <code>char</code>s in <code>charRefBuf</code>.
 | |
|      */
 | |
|     private int charRefBufLen;
 | |
| 
 | |
|     /**
 | |
|      * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
 | |
|      */
 | |
|     private final @Auto char[] bmpChar;
 | |
| 
 | |
|     /**
 | |
|      * Buffer for expanding astral NCRs.
 | |
|      */
 | |
|     private final @Auto char[] astralChar;
 | |
| 
 | |
|     /**
 | |
|      * The element whose end tag closes the current CDATA or RCDATA element.
 | |
|      */
 | |
|     protected ElementName endTagExpectation = null;
 | |
| 
 | |
|     private char[] endTagExpectationAsArray; // not @Auto!
 | |
| 
 | |
|     /**
 | |
|      * <code>true</code> if tokenizing an end tag
 | |
|      */
 | |
|     protected boolean endTag;
 | |
| 
 | |
|     /**
 | |
|      * <code>true</code> iff the current element/attribute name contains
 | |
|      * a hyphen.
 | |
|      */
 | |
|     private boolean containsHyphen;
 | |
| 
 | |
|     /**
 | |
|      * The current tag token name. One of
 | |
|      * 1) null,
 | |
|      * 2) non-owning reference to nonInternedTagName
 | |
|      * 3) non-owning reference to a pre-interned ElementName
 | |
|      */
 | |
|     private ElementName tagName = null;
 | |
| 
 | |
|     /**
 | |
|      * The recycled ElementName instance for the non-pre-interned cases.
 | |
|      */
 | |
|     private ElementName nonInternedTagName = null;
 | |
| 
 | |
|     /**
 | |
|      * The current attribute name.
 | |
|      */
 | |
|     protected AttributeName attributeName = null;
 | |
| 
 | |
|     // CPPONLY: private AttributeName nonInternedAttributeName = null;
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * Whether comment tokens are emitted.
 | |
|      */
 | |
|     private boolean wantsComments = false;
 | |
| 
 | |
|     /**
 | |
|      * <code>true</code> when HTML4-specific additional errors are requested.
 | |
|      */
 | |
|     protected boolean html4;
 | |
| 
 | |
|     /**
 | |
|      * Whether the stream is past the first 1024 bytes.
 | |
|      */
 | |
|     private boolean metaBoundaryPassed;
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     /**
 | |
|      * The name of the current doctype token.
 | |
|      */
 | |
|     private @Local String doctypeName;
 | |
| 
 | |
|     /**
 | |
|      * The public id of the current doctype token.
 | |
|      */
 | |
|     private String publicIdentifier;
 | |
| 
 | |
|     /**
 | |
|      * The system id of the current doctype token.
 | |
|      */
 | |
|     private String systemIdentifier;
 | |
| 
 | |
|     /**
 | |
|      * The attribute holder.
 | |
|      */
 | |
|     private HtmlAttributes attributes;
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * The policy for vertical tab and form feed.
 | |
|      */
 | |
|     private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET;
 | |
| 
 | |
|     /**
 | |
|      * The policy for comments.
 | |
|      */
 | |
|     private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET;
 | |
| 
 | |
|     private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET;
 | |
| 
 | |
|     private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
 | |
| 
 | |
|     private boolean html4ModeCompatibleWithXhtml1Schemata;
 | |
| 
 | |
|     private int mappingLangToXmlLang;
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     private final boolean newAttributesEachTime;
 | |
| 
 | |
|     private boolean shouldSuspend;
 | |
| 
 | |
|     protected boolean confident;
 | |
| 
 | |
|     private int line;
 | |
| 
 | |
|     /*
 | |
|      * The line number of the current attribute. First set to the line of the
 | |
|      * attribute name and if there is a value, set to the line the value
 | |
|      * started on.
 | |
|      */
 | |
|     // CPPONLY: private int attributeLine;
 | |
| 
 | |
|     private Interner interner;
 | |
| 
 | |
|     // CPPONLY: private boolean viewingXmlSource;
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     protected LocatorImpl ampersandLocation;
 | |
| 
 | |
|     public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
 | |
|         this.tokenHandler = tokenHandler;
 | |
|         this.encodingDeclarationHandler = null;
 | |
|         this.newAttributesEachTime = newAttributesEachTime;
 | |
|         // ∳ is the longest valid char ref and
 | |
|         // the semicolon never gets appended to the buffer.
 | |
|         this.charRefBuf = new char[32];
 | |
|         this.bmpChar = new char[1];
 | |
|         this.astralChar = new char[2];
 | |
|         this.containsHyphen = false;
 | |
|         this.tagName = null;
 | |
|         this.nonInternedTagName = new ElementName();
 | |
|         this.attributeName = null;
 | |
|         // CPPONLY: this.nonInternedAttributeName = new AttributeName();
 | |
|         this.doctypeName = null;
 | |
|         this.publicIdentifier = null;
 | |
|         this.systemIdentifier = null;
 | |
|         this.attributes = null;
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     /**
 | |
|      * The constructor.
 | |
|      *
 | |
|      * @param tokenHandler
 | |
|      *            the handler for receiving tokens
 | |
|      */
 | |
|     public Tokenizer(TokenHandler tokenHandler
 | |
|     // CPPONLY: , boolean viewingXmlSource
 | |
|     ) {
 | |
|         this.tokenHandler = tokenHandler;
 | |
|         this.encodingDeclarationHandler = null;
 | |
|         // [NOCPP[
 | |
|         this.newAttributesEachTime = false;
 | |
|         // ]NOCPP]
 | |
|         // ∳ is the longest valid char ref and
 | |
|         // the semicolon never gets appended to the buffer.
 | |
|         this.charRefBuf = new char[32];
 | |
|         this.bmpChar = new char[1];
 | |
|         this.astralChar = new char[2];
 | |
|         this.containsHyphen = false;
 | |
|         this.tagName = null;
 | |
|         this.nonInternedTagName = new ElementName();
 | |
|         this.attributeName = null;
 | |
|         // CPPONLY: this.nonInternedAttributeName = new AttributeName();
 | |
|         this.doctypeName = null;
 | |
|         this.publicIdentifier = null;
 | |
|         this.systemIdentifier = null;
 | |
|         // [NOCPP[
 | |
|         this.attributes = null;
 | |
|         // ]NOCPP]
 | |
|         // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null;
 | |
|         // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder();
 | |
|         // CPPONLY: this.viewingXmlSource = viewingXmlSource;
 | |
|     }
 | |
| 
 | |
|     public void setInterner(Interner interner) {
 | |
|         this.interner = interner;
 | |
|     }
 | |
| 
 | |
|     public void initLocation(String newPublicId, String newSystemId) {
 | |
|         this.systemId = newSystemId;
 | |
|         this.publicId = newPublicId;
 | |
| 
 | |
|     }
 | |
| 
 | |
|     // CPPONLY: boolean isViewingXmlSource() {
 | |
|     // CPPONLY: return viewingXmlSource;
 | |
|     // CPPONLY: }
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * Returns the mappingLangToXmlLang.
 | |
|      *
 | |
|      * @return the mappingLangToXmlLang
 | |
|      */
 | |
|     public boolean isMappingLangToXmlLang() {
 | |
|         return mappingLangToXmlLang == AttributeName.HTML_LANG;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the mappingLangToXmlLang.
 | |
|      *
 | |
|      * @param mappingLangToXmlLang
 | |
|      *            the mappingLangToXmlLang to set
 | |
|      */
 | |
|     public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
 | |
|         this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG
 | |
|                 : AttributeName.HTML;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the error handler.
 | |
|      *
 | |
|      * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
 | |
|      */
 | |
|     public void setErrorHandler(ErrorHandler eh) {
 | |
|         this.errorHandler = eh;
 | |
|     }
 | |
| 
 | |
|     public ErrorHandler getErrorHandler() {
 | |
|         return this.errorHandler;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the commentPolicy.
 | |
|      *
 | |
|      * @param commentPolicy
 | |
|      *            the commentPolicy to set
 | |
|      */
 | |
|     public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
 | |
|         this.commentPolicy = commentPolicy;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the contentNonXmlCharPolicy.
 | |
|      *
 | |
|      * @param contentNonXmlCharPolicy
 | |
|      *            the contentNonXmlCharPolicy to set
 | |
|      */
 | |
|     public void setContentNonXmlCharPolicy(
 | |
|             XmlViolationPolicy contentNonXmlCharPolicy) {
 | |
|         if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) {
 | |
|             throw new IllegalArgumentException(
 | |
|                     "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the contentSpacePolicy.
 | |
|      *
 | |
|      * @param contentSpacePolicy
 | |
|      *            the contentSpacePolicy to set
 | |
|      */
 | |
|     public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
 | |
|         this.contentSpacePolicy = contentSpacePolicy;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the xmlnsPolicy.
 | |
|      *
 | |
|      * @param xmlnsPolicy
 | |
|      *            the xmlnsPolicy to set
 | |
|      */
 | |
|     public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
 | |
|         if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
 | |
|             throw new IllegalArgumentException("Can't use FATAL here.");
 | |
|         }
 | |
|         this.xmlnsPolicy = xmlnsPolicy;
 | |
|     }
 | |
| 
 | |
|     public void setNamePolicy(XmlViolationPolicy namePolicy) {
 | |
|         this.namePolicy = namePolicy;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the html4ModeCompatibleWithXhtml1Schemata.
 | |
|      *
 | |
|      * @param html4ModeCompatibleWithXhtml1Schemata
 | |
|      *            the html4ModeCompatibleWithXhtml1Schemata to set
 | |
|      */
 | |
|     public void setHtml4ModeCompatibleWithXhtml1Schemata(
 | |
|             boolean html4ModeCompatibleWithXhtml1Schemata) {
 | |
|         this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     // For the token handler to call
 | |
|     /**
 | |
|      * Sets the tokenizer state and the associated element name. This should
 | |
|      * only ever used to put the tokenizer into one of the states that have
 | |
|      * a special end tag expectation.
 | |
|      *
 | |
|      * @param specialTokenizerState
 | |
|      *            the tokenizer state to set
 | |
|      * @param endTagExpectation
 | |
|      *            the expected end tag for transitioning back to normal
 | |
|      */
 | |
|     public void setStateAndEndTagExpectation(int specialTokenizerState,
 | |
|             @Local String endTagExpectation) {
 | |
|         this.stateSave = specialTokenizerState;
 | |
|         if (specialTokenizerState == Tokenizer.DATA) {
 | |
|             return;
 | |
|         }
 | |
|         @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
 | |
|         this.endTagExpectation = ElementName.elementNameByBuffer(asArray,
 | |
|                 asArray.length, interner);
 | |
|         assert this.endTagExpectation != null;
 | |
|         endTagExpectationToArray();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the tokenizer state and the associated element name. This should
 | |
|      * only ever used to put the tokenizer into one of the states that have
 | |
|      * a special end tag expectation.
 | |
|      *
 | |
|      * @param specialTokenizerState
 | |
|      *            the tokenizer state to set
 | |
|      * @param endTagExpectation
 | |
|      *            the expected end tag for transitioning back to normal
 | |
|      */
 | |
|     public void setStateAndEndTagExpectation(int specialTokenizerState,
 | |
|             ElementName endTagExpectation) {
 | |
|         this.stateSave = specialTokenizerState;
 | |
|         this.endTagExpectation = endTagExpectation;
 | |
|         endTagExpectationToArray();
 | |
|     }
 | |
| 
 | |
|     private void endTagExpectationToArray() {
 | |
|         switch (endTagExpectation.getGroup()) {
 | |
|             case TreeBuilder.TITLE:
 | |
|                 endTagExpectationAsArray = TITLE_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.SCRIPT:
 | |
|                 endTagExpectationAsArray = SCRIPT_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.STYLE:
 | |
|                 endTagExpectationAsArray = STYLE_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.PLAINTEXT:
 | |
|                 endTagExpectationAsArray = PLAINTEXT_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.XMP:
 | |
|                 endTagExpectationAsArray = XMP_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.TEXTAREA:
 | |
|                 endTagExpectationAsArray = TEXTAREA_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.IFRAME:
 | |
|                 endTagExpectationAsArray = IFRAME_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.NOEMBED:
 | |
|                 endTagExpectationAsArray = NOEMBED_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.NOSCRIPT:
 | |
|                 endTagExpectationAsArray = NOSCRIPT_ARR;
 | |
|                 return;
 | |
|             case TreeBuilder.NOFRAMES:
 | |
|                 endTagExpectationAsArray = NOFRAMES_ARR;
 | |
|                 return;
 | |
|             default:
 | |
|                 assert false: "Bad end tag expectation.";
 | |
|                 return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * For C++ use only.
 | |
|      */
 | |
|     public void setLineNumber(int line) {
 | |
|         // CPPONLY: this.attributeLine = line; // XXX is this needed?
 | |
|         this.line = line;
 | |
|     }
 | |
| 
 | |
|     // start Locator impl
 | |
| 
 | |
|     /**
 | |
|      * @see org.xml.sax.Locator#getLineNumber()
 | |
|      */
 | |
|     @Inline public int getLineNumber() {
 | |
|         return line;
 | |
|     }
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * @see org.xml.sax.Locator#getColumnNumber()
 | |
|      */
 | |
|     @Inline public int getColumnNumber() {
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * @see org.xml.sax.Locator#getPublicId()
 | |
|      */
 | |
|     public String getPublicId() {
 | |
|         return publicId;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * @see org.xml.sax.Locator#getSystemId()
 | |
|      */
 | |
|     public String getSystemId() {
 | |
|         return systemId;
 | |
|     }
 | |
| 
 | |
|     // end Locator impl
 | |
| 
 | |
|     // end public API
 | |
| 
 | |
|     public void notifyAboutMetaBoundary() {
 | |
|         metaBoundaryPassed = true;
 | |
|     }
 | |
| 
 | |
|     void turnOnAdditionalHtml4Errors() {
 | |
|         html4 = true;
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     HtmlAttributes emptyAttributes() {
 | |
|         // [NOCPP[
 | |
|         if (newAttributesEachTime) {
 | |
|             return new HtmlAttributes(mappingLangToXmlLang);
 | |
|         } else {
 | |
|             // ]NOCPP]
 | |
|             return HtmlAttributes.EMPTY_ATTRIBUTES;
 | |
|             // [NOCPP[
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|     }
 | |
| 
 | |
|     @Inline private void appendCharRefBuf(char c) {
 | |
|         // CPPONLY: assert charRefBufLen < charRefBuf.length:
 | |
|         // CPPONLY:     "RELEASE: Attempted to overrun charRefBuf!";
 | |
|         charRefBuf[charRefBufLen++] = c;
 | |
|     }
 | |
| 
 | |
|     private void emitOrAppendCharRefBuf(int returnState) throws SAXException {
 | |
|         if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|             appendCharRefBufToStrBuf();
 | |
|         } else {
 | |
|             if (charRefBufLen > 0) {
 | |
|                 tokenHandler.characters(charRefBuf, 0, charRefBufLen);
 | |
|                 charRefBufLen = 0;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     @Inline private void clearStrBufAfterUse() {
 | |
|         strBufLen = 0;
 | |
|     }
 | |
| 
 | |
|     @Inline private void clearStrBufBeforeUse() {
 | |
|         assert strBufLen == 0: "strBufLen not reset after previous use!";
 | |
|         strBufLen = 0; // no-op in the absence of bugs
 | |
|     }
 | |
| 
 | |
|     @Inline private void clearStrBufAfterOneHyphen() {
 | |
|         assert strBufLen == 1: "strBufLen length not one!";
 | |
|         assert strBuf[0] == '-': "strBuf does not start with a hyphen!";
 | |
|         strBufLen = 0;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Appends to the buffer.
 | |
|      *
 | |
|      * @param c
 | |
|      *            the UTF-16 code unit to append
 | |
|      */
 | |
|     @Inline private void appendStrBuf(char c) {
 | |
|         // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient.";
 | |
|         // CPPONLY: if (strBufLen == strBuf.length) {
 | |
|         // CPPONLY:     if (!EnsureBufferSpace(1)) {
 | |
|         // CPPONLY:         assert false: "RELEASE: Unable to recover from buffer reallocation failure";
 | |
|         // CPPONLY:     } // TODO: Add telemetry when outer if fires but inner does not
 | |
|         // CPPONLY: }
 | |
|         strBuf[strBufLen++] = c;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * The buffer as a String. Currently only used for error reporting.
 | |
|      *
 | |
|      * <p>
 | |
|      * C++ memory note: The return value must be released.
 | |
|      *
 | |
|      * @return the buffer as a string
 | |
|      */
 | |
|     protected String strBufToString() {
 | |
|         String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen
 | |
|             // CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS
 | |
|         );
 | |
|         clearStrBufAfterUse();
 | |
|         return str;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns the buffer as a local name. The return value is released in
 | |
|      * emitDoctypeToken().
 | |
|      *
 | |
|      * @return the buffer as local name
 | |
|      */
 | |
|     private void strBufToDoctypeName() {
 | |
|         doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner);
 | |
|         clearStrBufAfterUse();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Emits the buffer as character tokens.
 | |
|      *
 | |
|      * @throws SAXException
 | |
|      *             if the token handler threw
 | |
|      */
 | |
|     private void emitStrBuf() throws SAXException {
 | |
|         if (strBufLen > 0) {
 | |
|             tokenHandler.characters(strBuf, 0, strBufLen);
 | |
|             clearStrBufAfterUse();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
 | |
|         // [NOCPP[
 | |
|         switch (commentPolicy) {
 | |
|             case ALTER_INFOSET:
 | |
|                 appendStrBuf(' ');
 | |
|                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|             case ALLOW:
 | |
|                 warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
 | |
|                 // ]NOCPP]
 | |
|                 appendStrBuf('-');
 | |
|                 // [NOCPP[
 | |
|                 break;
 | |
|             case FATAL:
 | |
|                 fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
 | |
|                 break;
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|     }
 | |
| 
 | |
|     // [NOCPP[
 | |
|     private void maybeAppendSpaceToBogusComment() throws SAXException {
 | |
|         switch (commentPolicy) {
 | |
|             case ALTER_INFOSET:
 | |
|                 appendStrBuf(' ');
 | |
|                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|             case ALLOW:
 | |
|                 warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
 | |
|                 break;
 | |
|             case FATAL:
 | |
|                 fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
 | |
|             throws SAXException {
 | |
|         errConsecutiveHyphens();
 | |
|         // [NOCPP[
 | |
|         switch (commentPolicy) {
 | |
|             case ALTER_INFOSET:
 | |
|                 strBufLen--;
 | |
|                 // WARNING!!! This expands the worst case of the buffer length
 | |
|                 // given the length of input!
 | |
|                 appendStrBuf(' ');
 | |
|                 appendStrBuf('-');
 | |
|                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|             case ALLOW:
 | |
|                 warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
 | |
|                 // ]NOCPP]
 | |
|                 appendStrBuf(c);
 | |
|                 // [NOCPP[
 | |
|                 break;
 | |
|             case FATAL:
 | |
|                 fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
 | |
|                 break;
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|     }
 | |
| 
 | |
|     private void appendStrBuf(@NoLength char[] buffer, int offset, int length) {
 | |
|         int newLen = strBufLen + length;
 | |
|         // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient.";
 | |
|         // CPPONLY: if (strBuf.length < newLen) {
 | |
|         // CPPONLY:     if (!EnsureBufferSpace(length)) {
 | |
|         // CPPONLY:         assert false: "RELEASE: Unable to recover from buffer reallocation failure";
 | |
|         // CPPONLY:     } // TODO: Add telemetry when outer if fires but inner does not
 | |
|         // CPPONLY: }
 | |
|         System.arraycopy(buffer, offset, strBuf, strBufLen, length);
 | |
|         strBufLen = newLen;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Append the contents of the char reference buffer to the main one.
 | |
|      */
 | |
|     @Inline private void appendCharRefBufToStrBuf() {
 | |
|         appendStrBuf(charRefBuf, 0, charRefBufLen);
 | |
|         charRefBufLen = 0;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Emits the current comment token.
 | |
|      *
 | |
|      * @param pos
 | |
|      *            TODO
 | |
|      *
 | |
|      * @throws SAXException
 | |
|      */
 | |
|     private void emitComment(int provisionalHyphens, int pos)
 | |
|             throws SAXException {
 | |
|         // [NOCPP[
 | |
|         if (wantsComments) {
 | |
|             // ]NOCPP]
 | |
|             tokenHandler.comment(strBuf, 0, strBufLen
 | |
|                     - provisionalHyphens);
 | |
|             // [NOCPP[
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|         clearStrBufAfterUse();
 | |
|         cstart = pos + 1;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Flushes coalesced character tokens.
 | |
|      *
 | |
|      * @param buf
 | |
|      *            TODO
 | |
|      * @param pos
 | |
|      *            TODO
 | |
|      *
 | |
|      * @throws SAXException
 | |
|      */
 | |
|     protected void flushChars(@NoLength char[] buf, int pos)
 | |
|             throws SAXException {
 | |
|         if (pos > cstart) {
 | |
|             tokenHandler.characters(buf, cstart, pos - cstart);
 | |
|         }
 | |
|         cstart = Integer.MAX_VALUE;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Reports an condition that would make the infoset incompatible with XML
 | |
|      * 1.0 as fatal.
 | |
|      *
 | |
|      * @param message
 | |
|      *            the message
 | |
|      * @throws SAXException
 | |
|      * @throws SAXParseException
 | |
|      */
 | |
|     public void fatal(String message) throws SAXException {
 | |
|         SAXParseException spe = new SAXParseException(message, this);
 | |
|         if (errorHandler != null) {
 | |
|             errorHandler.fatalError(spe);
 | |
|         }
 | |
|         throw spe;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Reports a Parse Error.
 | |
|      *
 | |
|      * @param message
 | |
|      *            the message
 | |
|      * @throws SAXException
 | |
|      */
 | |
|     public void err(String message) throws SAXException {
 | |
|         if (errorHandler == null) {
 | |
|             return;
 | |
|         }
 | |
|         SAXParseException spe = new SAXParseException(message, this);
 | |
|         errorHandler.error(spe);
 | |
|     }
 | |
| 
 | |
|     public void errTreeBuilder(String message) throws SAXException {
 | |
|         ErrorHandler eh = null;
 | |
|         if (tokenHandler instanceof TreeBuilder<?>) {
 | |
|             TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler;
 | |
|             eh = treeBuilder.getErrorHandler();
 | |
|         }
 | |
|         if (eh == null) {
 | |
|             eh = errorHandler;
 | |
|         }
 | |
|         if (eh == null) {
 | |
|             return;
 | |
|         }
 | |
|         SAXParseException spe = new SAXParseException(message, this);
 | |
|         eh.error(spe);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Reports a warning
 | |
|      *
 | |
|      * @param message
 | |
|      *            the message
 | |
|      * @throws SAXException
 | |
|      */
 | |
|     public void warn(String message) throws SAXException {
 | |
|         if (errorHandler == null) {
 | |
|             return;
 | |
|         }
 | |
|         SAXParseException spe = new SAXParseException(message, this);
 | |
|         errorHandler.warning(spe);
 | |
|     }
 | |
| 
 | |
|     private void strBufToElementNameString() {
 | |
|         if (containsHyphen) {
 | |
|             // We've got a custom element or annotation-xml.
 | |
|             @Local String annotationName = ElementName.ANNOTATION_XML.getName();
 | |
|             if (Portability.localEqualsBuffer(annotationName, strBuf, strBufLen)) {
 | |
|                 tagName = ElementName.ANNOTATION_XML;
 | |
|             } else {
 | |
|                 nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, strBufLen,
 | |
|                         interner)
 | |
|                         // CPPONLY: , true
 | |
|                         );
 | |
|                 tagName = nonInternedTagName;
 | |
|             }
 | |
|         } else {
 | |
|             tagName = ElementName.elementNameByBuffer(strBuf, strBufLen, interner);
 | |
|             if (tagName == null) {
 | |
|                 nonInternedTagName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, strBufLen,
 | |
|                     interner)
 | |
|                         // CPPONLY: , false
 | |
|                         );
 | |
|                 tagName = nonInternedTagName;
 | |
|             }
 | |
|         }
 | |
|         containsHyphen = false;
 | |
|         clearStrBufAfterUse();
 | |
|     }
 | |
| 
 | |
|     private int emitCurrentTagToken(boolean selfClosing, int pos)
 | |
|             throws SAXException {
 | |
|         cstart = pos + 1;
 | |
|         maybeErrSlashInEndTag(selfClosing);
 | |
|         stateSave = Tokenizer.DATA;
 | |
|         HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
 | |
|                 : attributes);
 | |
|         if (endTag) {
 | |
|             /*
 | |
|              * When an end tag token is emitted, the content model flag must be
 | |
|              * switched to the PCDATA state.
 | |
|              */
 | |
|             maybeErrAttributesOnEndTag(attrs);
 | |
|             // CPPONLY: if (!viewingXmlSource) {
 | |
|             tokenHandler.endTag(tagName);
 | |
|             // CPPONLY: }
 | |
|             // CPPONLY: if (newAttributesEachTime) {
 | |
|             // CPPONLY:   Portability.delete(attributes);
 | |
|             // CPPONLY:   attributes = null;
 | |
|             // CPPONLY: }
 | |
|         } else {
 | |
|             // CPPONLY: if (viewingXmlSource) {
 | |
|             // CPPONLY:   assert newAttributesEachTime;
 | |
|             // CPPONLY:   Portability.delete(attributes);
 | |
|             // CPPONLY:   attributes = null;
 | |
|             // CPPONLY: } else {
 | |
|             tokenHandler.startTag(tagName, attrs, selfClosing);
 | |
|             // CPPONLY: }
 | |
|         }
 | |
|         tagName = null;
 | |
|         if (newAttributesEachTime) {
 | |
|             attributes = null;
 | |
|         } else {
 | |
|             attributes.clear(mappingLangToXmlLang);
 | |
|         }
 | |
|         /*
 | |
|          * The token handler may have called setStateAndEndTagExpectation
 | |
|          * and changed stateSave since the start of this method.
 | |
|          */
 | |
|         return stateSave;
 | |
|     }
 | |
| 
 | |
|     private void attributeNameComplete() throws SAXException {
 | |
|         attributeName = AttributeName.nameByBuffer(strBuf, strBufLen, interner);
 | |
|         if (attributeName == null) {
 | |
|             // [NOCPP[
 | |
|             attributeName = AttributeName.createAttributeName(
 | |
|                     Portability.newLocalNameFromBuffer(strBuf, strBufLen,
 | |
|                             interner),
 | |
|                     namePolicy != XmlViolationPolicy.ALLOW);
 | |
|             // ]NOCPP]
 | |
|             // CPPONLY:     nonInternedAttributeName.setNameForNonInterned(Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner));
 | |
|             // CPPONLY:     attributeName = nonInternedAttributeName;
 | |
|         }
 | |
|         clearStrBufAfterUse();
 | |
| 
 | |
|         if (attributes == null) {
 | |
|             attributes = new HtmlAttributes(mappingLangToXmlLang);
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * When the user agent leaves the attribute name state (and before
 | |
|          * emitting the tag token, if appropriate), the complete attribute's
 | |
|          * name must be compared to the other attributes on the same token; if
 | |
|          * there is already an attribute on the token with the exact same name,
 | |
|          * then this is a parse error and the new attribute must be dropped,
 | |
|          * along with the value that gets associated with it (if any).
 | |
|          */
 | |
|         if (attributes.contains(attributeName)) {
 | |
|             errDuplicateAttribute();
 | |
|             attributeName = null;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private void addAttributeWithoutValue() throws SAXException {
 | |
|         noteAttributeWithoutValue();
 | |
| 
 | |
|         // [NOCPP[
 | |
|         if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
 | |
|                 && ElementName.META == tagName) {
 | |
|             err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|         if (attributeName != null) {
 | |
|             // [NOCPP[
 | |
|             if (html4) {
 | |
|                 if (attributeName.isBoolean()) {
 | |
|                     if (html4ModeCompatibleWithXhtml1Schemata) {
 | |
|                         attributes.addAttribute(attributeName,
 | |
|                                 attributeName.getLocal(AttributeName.HTML),
 | |
|                                 xmlnsPolicy);
 | |
|                     } else {
 | |
|                         attributes.addAttribute(attributeName, "", xmlnsPolicy);
 | |
|                     }
 | |
|                 } else {
 | |
|                     if (AttributeName.BORDER != attributeName) {
 | |
|                         err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
 | |
|                         attributes.addAttribute(attributeName, "", xmlnsPolicy);
 | |
|                     }
 | |
|                 }
 | |
|             } else {
 | |
|                 if (AttributeName.SRC == attributeName
 | |
|                         || AttributeName.HREF == attributeName) {
 | |
|                     warn("Attribute \u201C"
 | |
|                             + attributeName.getLocal(AttributeName.HTML)
 | |
|                             + "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
 | |
|                 }
 | |
|                 // ]NOCPP]
 | |
|                 attributes.addAttribute(attributeName,
 | |
|                         Portability.newEmptyString()
 | |
|                         // [NOCPP[
 | |
|                         , xmlnsPolicy
 | |
|                 // ]NOCPP]
 | |
|                 // CPPONLY: , attributeLine
 | |
|                 );
 | |
|                 // [NOCPP[
 | |
|             }
 | |
|             // ]NOCPP]
 | |
|             attributeName = null;
 | |
|         } else {
 | |
|             clearStrBufAfterUse();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private void addAttributeWithValue() throws SAXException {
 | |
|         // [NOCPP[
 | |
|         if (metaBoundaryPassed && ElementName.META == tagName
 | |
|                 && AttributeName.CHARSET == attributeName) {
 | |
|             err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|         if (attributeName != null) {
 | |
|             String val = strBufToString(); // Ownership transferred to
 | |
|             // HtmlAttributes
 | |
|             // CPPONLY: if (mViewSource) {
 | |
|             // CPPONLY:   mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
 | |
|             // CPPONLY: }
 | |
|             // [NOCPP[
 | |
|             if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
 | |
|                     && attributeName.isCaseFolded()) {
 | |
|                 val = newAsciiLowerCaseStringFromString(val);
 | |
|             }
 | |
|             // ]NOCPP]
 | |
|             attributes.addAttribute(attributeName, val
 | |
|             // [NOCPP[
 | |
|                     , xmlnsPolicy
 | |
|             // ]NOCPP]
 | |
|             // CPPONLY: , attributeLine
 | |
|             );
 | |
|             attributeName = null;
 | |
|         } else {
 | |
|             // We have a duplicate attribute. Explicitly discard its value.
 | |
|             clearStrBufAfterUse();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     private static String newAsciiLowerCaseStringFromString(String str) {
 | |
|         if (str == null) {
 | |
|             return null;
 | |
|         }
 | |
|         char[] buf = new char[str.length()];
 | |
|         for (int i = 0; i < str.length(); i++) {
 | |
|             char c = str.charAt(i);
 | |
|             if (c >= 'A' && c <= 'Z') {
 | |
|                 c += 0x20;
 | |
|             }
 | |
|             buf[i] = c;
 | |
|         }
 | |
|         return new String(buf);
 | |
|     }
 | |
| 
 | |
|     protected void startErrorReporting() throws SAXException {
 | |
| 
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     public void start() throws SAXException {
 | |
|         initializeWithoutStarting();
 | |
|         tokenHandler.startTokenization(this);
 | |
|         // [NOCPP[
 | |
|         startErrorReporting();
 | |
|         // ]NOCPP]
 | |
|     }
 | |
| 
 | |
|     public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
 | |
|         int state = stateSave;
 | |
|         int returnState = returnStateSave;
 | |
|         char c = '\u0000';
 | |
|         shouldSuspend = false;
 | |
|         lastCR = false;
 | |
| 
 | |
|         int start = buffer.getStart();
 | |
|         int end = buffer.getEnd();
 | |
| 
 | |
|         // In C++, the caller of tokenizeBuffer needs to do this explicitly.
 | |
|         // [NOCPP[
 | |
|         ensureBufferSpace(end - start);
 | |
|         // ]NOCPP]
 | |
| 
 | |
|         /**
 | |
|          * The index of the last <code>char</code> read from <code>buf</code>.
 | |
|          */
 | |
|         int pos = start - 1;
 | |
| 
 | |
|         /**
 | |
|          * The index of the first <code>char</code> in <code>buf</code> that is
 | |
|          * part of a coalesced run of character tokens or
 | |
|          * <code>Integer.MAX_VALUE</code> if there is not a current run being
 | |
|          * coalesced.
 | |
|          */
 | |
|         switch (state) {
 | |
|             case DATA:
 | |
|             case RCDATA:
 | |
|             case SCRIPT_DATA:
 | |
|             case PLAINTEXT:
 | |
|             case RAWTEXT:
 | |
|             case CDATA_SECTION:
 | |
|             case SCRIPT_DATA_ESCAPED:
 | |
|             case SCRIPT_DATA_ESCAPE_START:
 | |
|             case SCRIPT_DATA_ESCAPE_START_DASH:
 | |
|             case SCRIPT_DATA_ESCAPED_DASH:
 | |
|             case SCRIPT_DATA_ESCAPED_DASH_DASH:
 | |
|             case SCRIPT_DATA_DOUBLE_ESCAPE_START:
 | |
|             case SCRIPT_DATA_DOUBLE_ESCAPED:
 | |
|             case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
 | |
|             case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
 | |
|             case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
 | |
|             case SCRIPT_DATA_DOUBLE_ESCAPE_END:
 | |
|                 cstart = start;
 | |
|                 break;
 | |
|             default:
 | |
|                 cstart = Integer.MAX_VALUE;
 | |
|                 break;
 | |
|         }
 | |
| 
 | |
|         /**
 | |
|          * The number of <code>char</code>s in <code>buf</code> that have
 | |
|          * meaning. (The rest of the array is garbage and should not be
 | |
|          * examined.)
 | |
|          */
 | |
|         // CPPONLY: if (mViewSource) {
 | |
|         // CPPONLY:   mViewSource.SetBuffer(buffer);
 | |
|         // CPPONLY:   pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
 | |
|         // CPPONLY:   mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
 | |
|         // CPPONLY: } else {
 | |
|         // CPPONLY:   pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
 | |
|         // CPPONLY: }
 | |
|         // [NOCPP[
 | |
|         pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
 | |
|                 end);
 | |
|         // ]NOCPP]
 | |
|         if (pos == end) {
 | |
|             // exiting due to end of buffer
 | |
|             buffer.setStart(pos);
 | |
|         } else {
 | |
|             buffer.setStart(pos + 1);
 | |
|         }
 | |
|         return lastCR;
 | |
|     }
 | |
| 
 | |
|     // [NOCPP[
 | |
|     private void ensureBufferSpace(int inputLength) throws SAXException {
 | |
|         // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
 | |
|         // Adding to the general worst case instead of only the
 | |
|         // TreeBuilder-exposed worst case to avoid re-introducing a bug when
 | |
|         // unifying the tokenizer and tree builder buffers in the future.
 | |
|         int worstCase = strBufLen + inputLength + charRefBufLen + 2;
 | |
|         tokenHandler.ensureBufferSpace(worstCase);
 | |
|         if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) {
 | |
|             // When altering infoset, if the comment contents are consecutive
 | |
|             // hyphens, each hyphen generates a space, too. These buffer
 | |
|             // contents never get emitted as characters() to the tokenHandler,
 | |
|             // which is why this calculation happens after the call to
 | |
|             // ensureBufferSpace on tokenHandler.
 | |
|             worstCase *= 2;
 | |
|         }
 | |
|         if (strBuf == null) {
 | |
|             // Add an arbitrary small value to avoid immediate reallocation
 | |
|             // once there are a few characters in the buffer.
 | |
|             strBuf = new char[worstCase + 128];
 | |
|         } else if (worstCase > strBuf.length) {
 | |
|             // HotSpot reportedly allocates memory with 8-byte accuracy, so
 | |
|             // there's no point in trying to do math here to avoid slop.
 | |
|             // Maybe we should add some small constant to worstCase here
 | |
|             // but not doing that without profiling. In C++ with jemalloc,
 | |
|             // the corresponding method should do math to round up here
 | |
|             // to avoid slop.
 | |
|             char[] newBuf = new char[worstCase];
 | |
|             System.arraycopy(strBuf, 0, newBuf, 0, strBufLen);
 | |
|             strBuf = newBuf;
 | |
|         }
 | |
|     }
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     @SuppressWarnings("unused") private int stateLoop(int state, char c,
 | |
|             int pos, @NoLength char[] buf, boolean reconsume, int returnState,
 | |
|             int endPos) throws SAXException {
 | |
|         /*
 | |
|          * Idioms used in this code:
 | |
|          *
 | |
|          *
 | |
|          * Consuming the next input character
 | |
|          *
 | |
|          * To consume the next input character, the code does this: if (++pos ==
 | |
|          * endPos) { break stateloop; } c = checkChar(buf, pos);
 | |
|          *
 | |
|          *
 | |
|          * Staying in a state
 | |
|          *
 | |
|          * When there's a state that the tokenizer may stay in over multiple
 | |
|          * input characters, the state has a wrapper |for(;;)| loop and staying
 | |
|          * in the state continues the loop.
 | |
|          *
 | |
|          *
 | |
|          * Switching to another state
 | |
|          *
 | |
|          * To switch to another state, the code sets the state variable to the
 | |
|          * magic number of the new state. Then it either continues stateloop or
 | |
|          * breaks out of the state's own wrapper loop if the target state is
 | |
|          * right after the current state in source order. (This is a partial
 | |
|          * workaround for Java's lack of goto.)
 | |
|          *
 | |
|          *
 | |
|          * Reconsume support
 | |
|          *
 | |
|          * The spec sometimes says that an input character is reconsumed in
 | |
|          * another state. If a state can ever be entered so that an input
 | |
|          * character can be reconsumed in it, the state's code starts with an
 | |
|          * |if (reconsume)| that sets reconsume to false and skips over the
 | |
|          * normal code for consuming a new character.
 | |
|          *
 | |
|          * To reconsume the current character in another state, the code sets
 | |
|          * |reconsume| to true and then switches to the other state.
 | |
|          *
 | |
|          *
 | |
|          * Emitting character tokens
 | |
|          *
 | |
|          * This method emits character tokens lazily. Whenever a new range of
 | |
|          * character tokens starts, the field cstart must be set to the start
 | |
|          * index of the range. The flushChars() method must be called at the end
 | |
|          * of a range to flush it.
 | |
|          *
 | |
|          *
 | |
|          * U+0000 handling
 | |
|          *
 | |
|          * The various states have to handle the replacement of U+0000 with
 | |
|          * U+FFFD. However, if U+0000 would be reconsumed in another state, the
 | |
|          * replacement doesn't need to happen, because it's handled by the
 | |
|          * reconsuming state.
 | |
|          *
 | |
|          *
 | |
|          * LF handling
 | |
|          *
 | |
|          * Every state needs to increment the line number upon LF unless the LF
 | |
|          * gets reconsumed by another state which increments the line number.
 | |
|          *
 | |
|          *
 | |
|          * CR handling
 | |
|          *
 | |
|          * Every state needs to handle CR unless the CR gets reconsumed and is
 | |
|          * handled by the reconsuming state. The CR needs to be handled as if it
 | |
|          * were and LF, the lastCR field must be set to true and then this
 | |
|          * method must return. The IO driver will then swallow the next
 | |
|          * character if it is an LF to coalesce CRLF.
 | |
|          */
 | |
|         stateloop: for (;;) {
 | |
|             switch (state) {
 | |
|                 case DATA:
 | |
|                     dataloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '&':
 | |
|                                 /*
 | |
|                                  * U+0026 AMPERSAND (&) Switch to the character
 | |
|                                  * reference in data state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
|                                 assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
 | |
|                                 appendCharRefBuf(c);
 | |
|                                 setAdditionalAndRememberAmpersandLocation('\u0000');
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the tag
 | |
|                                  * open state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
| 
 | |
|                                 state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
 | |
|                                 break dataloop; // FALL THROUGH continue
 | |
|                             // stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the input character as a
 | |
|                                  * character token.
 | |
|                                  *
 | |
|                                  * Stay in the data state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case TAG_OPEN:
 | |
|                     tagopenloop: for (;;) {
 | |
|                         /*
 | |
|                          * The behavior of this state depends on the content
 | |
|                          * model flag.
 | |
|                          */
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * If the content model flag is set to the PCDATA state
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         if (c >= 'A' && c <= 'Z') {
 | |
|                             /*
 | |
|                              * U+0041 LATIN CAPITAL LETTER A through to U+005A
 | |
|                              * LATIN CAPITAL LETTER Z Create a new start tag
 | |
|                              * token,
 | |
|                              */
 | |
|                             endTag = false;
 | |
|                             /*
 | |
|                              * set its tag name to the lowercase version of the
 | |
|                              * input character (add 0x0020 to the character's
 | |
|                              * code point),
 | |
|                              */
 | |
|                             clearStrBufBeforeUse();
 | |
|                             appendStrBuf((char) (c + 0x20));
 | |
|                             containsHyphen = false;
 | |
|                             /* then switch to the tag name state. */
 | |
|                             state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
 | |
|                             /*
 | |
|                              * (Don't emit the token yet; further details will
 | |
|                              * be filled in before it is emitted.)
 | |
|                              */
 | |
|                             break tagopenloop;
 | |
|                             // continue stateloop;
 | |
|                         } else if (c >= 'a' && c <= 'z') {
 | |
|                             /*
 | |
|                              * U+0061 LATIN SMALL LETTER A through to U+007A
 | |
|                              * LATIN SMALL LETTER Z Create a new start tag
 | |
|                              * token,
 | |
|                              */
 | |
|                             endTag = false;
 | |
|                             /*
 | |
|                              * set its tag name to the input character,
 | |
|                              */
 | |
|                             clearStrBufBeforeUse();
 | |
|                             appendStrBuf(c);
 | |
|                             containsHyphen = false;
 | |
|                             /* then switch to the tag name state. */
 | |
|                             state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
 | |
|                             /*
 | |
|                              * (Don't emit the token yet; further details will
 | |
|                              * be filled in before it is emitted.)
 | |
|                              */
 | |
|                             break tagopenloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '!':
 | |
|                                 /*
 | |
|                                  * U+0021 EXCLAMATION MARK (!) Switch to the
 | |
|                                  * markup declaration open state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Switch to the close tag
 | |
|                                  * open state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '?':
 | |
|                                 // CPPONLY: if (viewingXmlSource) {
 | |
|                                 // CPPONLY: state = transition(state,
 | |
|                                 // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
 | |
|                                 // CPPONLY: reconsume,
 | |
|                                 // CPPONLY: pos);
 | |
|                                 // CPPONLY: continue stateloop;
 | |
|                                 // CPPONLY: }
 | |
|                                 /*
 | |
|                                  * U+003F QUESTION MARK (?) Parse error.
 | |
|                                  */
 | |
|                                 errProcessingInstruction();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus comment state.
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Parse error.
 | |
|                                  */
 | |
|                                 errLtGt();
 | |
|                                 /*
 | |
|                                  * Emit a U+003C LESS-THAN SIGN character token
 | |
|                                  * and a U+003E GREATER-THAN SIGN character
 | |
|                                  * token.
 | |
|                                  */
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 2);
 | |
|                                 /* Switch to the data state. */
 | |
|                                 cstart = pos + 1;
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Parse error.
 | |
|                                  */
 | |
|                                 errBadCharAfterLt(c);
 | |
|                                 /*
 | |
|                                  * Emit a U+003C LESS-THAN SIGN character token
 | |
|                                  */
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                                 /*
 | |
|                                  * and reconsume the current input character in
 | |
|                                  * the data state.
 | |
|                                  */
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case TAG_NAME:
 | |
|                     tagnameloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 strBufToElementNameString();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the before attribute name state.
 | |
|                                  */
 | |
|                                 strBufToElementNameString();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 break tagnameloop;
 | |
|                             // continue stateloop;
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Switch to the self-closing
 | |
|                                  * start tag state.
 | |
|                                  */
 | |
|                                 strBufToElementNameString();
 | |
|                                 state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * tag token.
 | |
|                                  */
 | |
|                                 strBufToElementNameString();
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 if (c >= 'A' && c <= 'Z') {
 | |
|                                     /*
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                      * U+005A LATIN CAPITAL LETTER Z Append the
 | |
|                                      * lowercase version of the current input
 | |
|                                      * character (add 0x0020 to the character's
 | |
|                                      * code point) to the current tag token's
 | |
|                                      * tag name.
 | |
|                                      */
 | |
|                                     c += 0x20;
 | |
|                                 } else if (c == '-') {
 | |
|                                     containsHyphen = true;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current tag token's tag
 | |
|                                  * name.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the tag name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BEFORE_ATTRIBUTE_NAME:
 | |
|                     beforeattributenameloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the before attribute name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Switch to the self-closing
 | |
|                                  * start tag state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * tag token.
 | |
|                                  */
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case '\"':
 | |
|                             case '\'':
 | |
|                             case '<':
 | |
|                             case '=':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
 | |
|                                  * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
 | |
|                                  * SIGN (=) Parse error.
 | |
|                                  */
 | |
|                                 errBadCharBeforeAttributeNameOrNull(c);
 | |
|                                 /*
 | |
|                                  * Treat it as per the "anything else" entry
 | |
|                                  * below.
 | |
|                                  */
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Start a new attribute in the
 | |
|                                  * current tag token.
 | |
|                                  */
 | |
|                                 if (c >= 'A' && c <= 'Z') {
 | |
|                                     /*
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                      * U+005A LATIN CAPITAL LETTER Z Set that
 | |
|                                      * attribute's name to the lowercase version
 | |
|                                      * of the current input character (add
 | |
|                                      * 0x0020 to the character's code point)
 | |
|                                      */
 | |
|                                     c += 0x20;
 | |
|                                 }
 | |
|                                 // CPPONLY: attributeLine = line;
 | |
|                                 /*
 | |
|                                  * Set that attribute's name to the current
 | |
|                                  * input character,
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * and its value to the empty string.
 | |
|                                  */
 | |
|                                 // Will do later.
 | |
|                                 /*
 | |
|                                  * Switch to the attribute name state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 break beforeattributenameloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case ATTRIBUTE_NAME:
 | |
|                     attributenameloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 attributeNameComplete();
 | |
|                                 state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the after attribute name state.
 | |
|                                  */
 | |
|                                 attributeNameComplete();
 | |
|                                 state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Switch to the self-closing
 | |
|                                  * start tag state.
 | |
|                                  */
 | |
|                                 attributeNameComplete();
 | |
|                                 addAttributeWithoutValue();
 | |
|                                 state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '=':
 | |
|                                 /*
 | |
|                                  * U+003D EQUALS SIGN (=) Switch to the before
 | |
|                                  * attribute value state.
 | |
|                                  */
 | |
|                                 attributeNameComplete();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
 | |
|                                 break attributenameloop;
 | |
|                             // continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * tag token.
 | |
|                                  */
 | |
|                                 attributeNameComplete();
 | |
|                                 addAttributeWithoutValue();
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case '\"':
 | |
|                             case '\'':
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
 | |
|                                  * (') U+003C LESS-THAN SIGN (<) Parse error.
 | |
|                                  */
 | |
|                                 errQuoteOrLtInAttributeNameOrNull(c);
 | |
|                                 /*
 | |
|                                  * Treat it as per the "anything else" entry
 | |
|                                  * below.
 | |
|                                  */
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 if (c >= 'A' && c <= 'Z') {
 | |
|                                     /*
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                      * U+005A LATIN CAPITAL LETTER Z Append the
 | |
|                                      * lowercase version of the current input
 | |
|                                      * character (add 0x0020 to the character's
 | |
|                                      * code point) to the current attribute's
 | |
|                                      * name.
 | |
|                                      */
 | |
|                                     c += 0x20;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current attribute's name.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the attribute name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BEFORE_ATTRIBUTE_VALUE:
 | |
|                     beforeattributevalueloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the before attribute value state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Switch to the
 | |
|                                  * attribute value (double-quoted) state.
 | |
|                                  */
 | |
|                                 // CPPONLY: attributeLine = line;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 break beforeattributevalueloop;
 | |
|                             // continue stateloop;
 | |
|                             case '&':
 | |
|                                 /*
 | |
|                                  * U+0026 AMPERSAND (&) Switch to the attribute
 | |
|                                  * value (unquoted) state and reconsume this
 | |
|                                  * input character.
 | |
|                                  */
 | |
|                                 // CPPONLY: attributeLine = line;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
 | |
|                                 noteUnquotedAttributeValue();
 | |
|                                 continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Switch to the attribute
 | |
|                                  * value (single-quoted) state.
 | |
|                                  */
 | |
|                                 // CPPONLY: attributeLine = line;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Parse error.
 | |
|                                  */
 | |
|                                 errAttributeValueMissing();
 | |
|                                 /*
 | |
|                                  * Emit the current tag token.
 | |
|                                  */
 | |
|                                 addAttributeWithoutValue();
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case '<':
 | |
|                             case '=':
 | |
|                             case '`':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
 | |
|                                  * (=) U+0060 GRAVE ACCENT (`)
 | |
|                                  */
 | |
|                                 errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
 | |
|                                 /*
 | |
|                                  * Treat it as per the "anything else" entry
 | |
|                                  * below.
 | |
|                                  */
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 // [NOCPP[
 | |
|                                 errHtml4NonNameInUnquotedAttribute(c);
 | |
|                                 // ]NOCPP]
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current attribute's value.
 | |
|                                  */
 | |
|                                 // CPPONLY: attributeLine = line;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Switch to the attribute value (unquoted)
 | |
|                                  * state.
 | |
|                                  */
 | |
| 
 | |
|                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
 | |
|                                 noteUnquotedAttributeValue();
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
 | |
|                     attributevaluedoublequotedloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Switch to the after
 | |
|                                  * attribute value (quoted) state.
 | |
|                                  */
 | |
|                                 addAttributeWithValue();
 | |
| 
 | |
|                                 state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
 | |
|                                 break attributevaluedoublequotedloop;
 | |
|                             // continue stateloop;
 | |
|                             case '&':
 | |
|                                 /*
 | |
|                                  * U+0026 AMPERSAND (&) Switch to the character
 | |
|                                  * reference in attribute value state, with the
 | |
|                                  * additional allowed character being U+0022
 | |
|                                  * QUOTATION MARK (").
 | |
|                                  */
 | |
|                                 assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
 | |
|                                 appendCharRefBuf(c);
 | |
|                                 setAdditionalAndRememberAmpersandLocation('\"');
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current attribute's value.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the attribute value (double-quoted)
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case AFTER_ATTRIBUTE_VALUE_QUOTED:
 | |
|                     afterattributevaluequotedloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the before attribute name state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Switch to the self-closing
 | |
|                                  * start tag state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
 | |
|                                 break afterattributevaluequotedloop;
 | |
|                             // continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * tag token.
 | |
|                                  */
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Parse error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenAttributes();
 | |
|                                 /*
 | |
|                                  * Reconsume the character in the before
 | |
|                                  * attribute name state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SELF_CLOSING_START_TAG:
 | |
|                     if (++pos == endPos) {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     c = checkChar(buf, pos);
 | |
|                     /*
 | |
|                      * Consume the next input character:
 | |
|                      */
 | |
|                     switch (c) {
 | |
|                         case '>':
 | |
|                             /*
 | |
|                              * U+003E GREATER-THAN SIGN (>) Set the self-closing
 | |
|                              * flag of the current tag token. Emit the current
 | |
|                              * tag token.
 | |
|                              */
 | |
|                             // [NOCPP[
 | |
|                             errHtml4XmlVoidSyntax();
 | |
|                             // ]NOCPP]
 | |
|                             state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos);
 | |
|                             if (shouldSuspend) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             /*
 | |
|                              * Switch to the data state.
 | |
|                              */
 | |
|                             continue stateloop;
 | |
|                         default:
 | |
|                             /* Anything else Parse error. */
 | |
|                             errSlashNotFollowedByGt();
 | |
|                             /*
 | |
|                              * Reconsume the character in the before attribute
 | |
|                              * name state.
 | |
|                              */
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                     }
 | |
|                 case ATTRIBUTE_VALUE_UNQUOTED:
 | |
|                     for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 addAttributeWithValue();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the before attribute name state.
 | |
|                                  */
 | |
|                                 addAttributeWithValue();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '&':
 | |
|                                 /*
 | |
|                                  * U+0026 AMPERSAND (&) Switch to the character
 | |
|                                  * reference in attribute value state, with the
 | |
|                                  * additional allowed character being U+003E
 | |
|                                  * GREATER-THAN SIGN (>)
 | |
|                                  */
 | |
|                                 assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
 | |
|                                 appendCharRefBuf(c);
 | |
|                                 setAdditionalAndRememberAmpersandLocation('>');
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * tag token.
 | |
|                                  */
 | |
|                                 addAttributeWithValue();
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case '<':
 | |
|                             case '\"':
 | |
|                             case '\'':
 | |
|                             case '=':
 | |
|                             case '`':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
 | |
|                                  * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
 | |
|                                  * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
 | |
|                                  */
 | |
|                                 errUnquotedAttributeValOrNull(c);
 | |
|                                 /*
 | |
|                                  * Treat it as per the "anything else" entry
 | |
|                                  * below.
 | |
|                                  */
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 // [NOCPP]
 | |
|                                 errHtml4NonNameInUnquotedAttribute(c);
 | |
|                                 // ]NOCPP]
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current attribute's value.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the attribute value (unquoted) state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case AFTER_ATTRIBUTE_NAME:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the after attribute name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Switch to the self-closing
 | |
|                                  * start tag state.
 | |
|                                  */
 | |
|                                 addAttributeWithoutValue();
 | |
|                                 state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '=':
 | |
|                                 /*
 | |
|                                  * U+003D EQUALS SIGN (=) Switch to the before
 | |
|                                  * attribute value state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * tag token.
 | |
|                                  */
 | |
|                                 addAttributeWithoutValue();
 | |
|                                 state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                 if (shouldSuspend) {
 | |
|                                     break stateloop;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case '\"':
 | |
|                             case '\'':
 | |
|                             case '<':
 | |
|                                 errQuoteOrLtInAttributeNameOrNull(c);
 | |
|                                 /*
 | |
|                                  * Treat it as per the "anything else" entry
 | |
|                                  * below.
 | |
|                                  */
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 addAttributeWithoutValue();
 | |
|                                 /*
 | |
|                                  * Anything else Start a new attribute in the
 | |
|                                  * current tag token.
 | |
|                                  */
 | |
|                                 if (c >= 'A' && c <= 'Z') {
 | |
|                                     /*
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                      * U+005A LATIN CAPITAL LETTER Z Set that
 | |
|                                      * attribute's name to the lowercase version
 | |
|                                      * of the current input character (add
 | |
|                                      * 0x0020 to the character's code point)
 | |
|                                      */
 | |
|                                     c += 0x20;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Set that attribute's name to the current
 | |
|                                  * input character,
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * and its value to the empty string.
 | |
|                                  */
 | |
|                                 // Will do later.
 | |
|                                 /*
 | |
|                                  * Switch to the attribute name state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                 case MARKUP_DECLARATION_OPEN:
 | |
|                     markupdeclarationopenloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * If the next two characters are both U+002D
 | |
|                          * HYPHEN-MINUS characters (-), consume those two
 | |
|                          * characters, create a comment token whose data is the
 | |
|                          * empty string, and switch to the comment start state.
 | |
|                          *
 | |
|                          * Otherwise, if the next seven characters are an ASCII
 | |
|                          * case-insensitive match for the word "DOCTYPE", then
 | |
|                          * consume those characters and switch to the DOCTYPE
 | |
|                          * state.
 | |
|                          *
 | |
|                          * Otherwise, if the insertion mode is
 | |
|                          * "in foreign content" and the current node is not an
 | |
|                          * element in the HTML namespace and the next seven
 | |
|                          * characters are an case-sensitive match for the string
 | |
|                          * "[CDATA[" (the five uppercase letters "CDATA" with a
 | |
|                          * U+005B LEFT SQUARE BRACKET character before and
 | |
|                          * after), then consume those characters and switch to
 | |
|                          * the CDATA section state.
 | |
|                          *
 | |
|                          * Otherwise, is is a parse error. Switch to the bogus
 | |
|                          * comment state. The next character that is consumed,
 | |
|                          * if any, is the first character that will be in the
 | |
|                          * comment.
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
 | |
|                                 break markupdeclarationopenloop;
 | |
|                             // continue stateloop;
 | |
|                             case 'd':
 | |
|                             case 'D':
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 index = 0;
 | |
|                                 state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '[':
 | |
|                                 if (tokenHandler.cdataSectionAllowed()) {
 | |
|                                     clearStrBufBeforeUse();
 | |
|                                     appendStrBuf(c);
 | |
|                                     index = 0;
 | |
|                                     state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
 | |
|                                     continue stateloop;
 | |
|                                 }
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 errBogusComment();
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case MARKUP_DECLARATION_HYPHEN:
 | |
|                     markupdeclarationhyphenloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case '\u0000':
 | |
|                                 break stateloop;
 | |
|                             case '-':
 | |
|                                 clearStrBufAfterOneHyphen();
 | |
|                                 state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
 | |
|                                 break markupdeclarationhyphenloop;
 | |
|                             // continue stateloop;
 | |
|                             default:
 | |
|                                 errBogusComment();
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case COMMENT_START:
 | |
|                     commentstartloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Comment start state
 | |
|                          *
 | |
|                          *
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Switch to the comment
 | |
|                                  * start dash state.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Parse error.
 | |
|                                  */
 | |
|                                 errPrematureEndOfComment();
 | |
|                                 /* Emit the comment token. */
 | |
|                                 emitComment(0, pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 break commentstartloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the input character to
 | |
|                                  * the comment token's data.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Switch to the comment state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 break commentstartloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case COMMENT:
 | |
|                     commentloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Comment state Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Switch to the comment
 | |
|                                  * end dash state
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
 | |
|                                 break commentloop;
 | |
|                             // continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the input character to
 | |
|                                  * the comment token's data.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the comment state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case COMMENT_END_DASH:
 | |
|                     commentenddashloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Comment end dash state Consume the next input
 | |
|                          * character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Switch to the comment
 | |
|                                  * end state
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
 | |
|                                 break commentenddashloop;
 | |
|                             // continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append a U+002D HYPHEN-MINUS
 | |
|                                  * (-) character and the input character to the
 | |
|                                  * comment token's data.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Switch to the comment state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case COMMENT_END:
 | |
|                     commentendloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Comment end dash state Consume the next input
 | |
|                          * character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the comment
 | |
|                                  * token.
 | |
|                                  */
 | |
|                                 emitComment(2, pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '-':
 | |
|                                 /* U+002D HYPHEN-MINUS (-) Parse error. */
 | |
|                                 /*
 | |
|                                  * Append a U+002D HYPHEN-MINUS (-) character to
 | |
|                                  * the comment token's data.
 | |
|                                  */
 | |
|                                 adjustDoubleHyphenAndAppendToStrBufAndErr(c);
 | |
|                                 /*
 | |
|                                  * Stay in the comment end state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 adjustDoubleHyphenAndAppendToStrBufLineFeed();
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '!':
 | |
|                                 errHyphenHyphenBang();
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Append two U+002D HYPHEN-MINUS (-) characters
 | |
|                                  * and the input character to the comment
 | |
|                                  * token's data.
 | |
|                                  */
 | |
|                                 adjustDoubleHyphenAndAppendToStrBufAndErr(c);
 | |
|                                 /*
 | |
|                                  * Switch to the comment state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                 case COMMENT_END_BANG:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Comment end bang state
 | |
|                          *
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the comment
 | |
|                                  * token.
 | |
|                                  */
 | |
|                                 emitComment(3, pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * Append two U+002D HYPHEN-MINUS (-) characters
 | |
|                                  * and a U+0021 EXCLAMATION MARK (!) character
 | |
|                                  * to the comment token's data.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Switch to the comment end dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append two U+002D HYPHEN-MINUS
 | |
|                                  * (-) characters, a U+0021 EXCLAMATION MARK (!)
 | |
|                                  * character, and the input character to the
 | |
|                                  * comment token's data. Switch to the comment
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Switch to the comment state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                 case COMMENT_START_DASH:
 | |
|                     if (++pos == endPos) {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     c = checkChar(buf, pos);
 | |
|                     /*
 | |
|                      * Comment start dash state
 | |
|                      *
 | |
|                      * Consume the next input character:
 | |
|                      */
 | |
|                     switch (c) {
 | |
|                         case '-':
 | |
|                             /*
 | |
|                              * U+002D HYPHEN-MINUS (-) Switch to the comment end
 | |
|                              * state
 | |
|                              */
 | |
|                             appendStrBuf(c);
 | |
|                             state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         case '>':
 | |
|                             errPrematureEndOfComment();
 | |
|                             /* Emit the comment token. */
 | |
|                             emitComment(1, pos);
 | |
|                             /*
 | |
|                              * Switch to the data state.
 | |
|                              */
 | |
|                             state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         case '\r':
 | |
|                             appendStrBufCarriageReturn();
 | |
|                             state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                             break stateloop;
 | |
|                         case '\n':
 | |
|                             appendStrBufLineFeed();
 | |
|                             state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         case '\u0000':
 | |
|                             c = '\uFFFD';
 | |
|                             // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                         default:
 | |
|                             /*
 | |
|                              * Append a U+002D HYPHEN-MINUS character (-) and
 | |
|                              * the current input character to the comment
 | |
|                              * token's data.
 | |
|                              */
 | |
|                             appendStrBuf(c);
 | |
|                             /*
 | |
|                              * Switch to the comment state.
 | |
|                              */
 | |
|                             state = transition(state, Tokenizer.COMMENT, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                     }
 | |
|                 case CDATA_START:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         if (index < 6) { // CDATA_LSQB.length
 | |
|                             if (c == Tokenizer.CDATA_LSQB[index]) {
 | |
|                                 appendStrBuf(c);
 | |
|                             } else {
 | |
|                                 errBogusComment();
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             index++;
 | |
|                             continue;
 | |
|                         } else {
 | |
|                             clearStrBufAfterUse();
 | |
|                             cstart = pos; // start coalescing
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
 | |
|                             break; // FALL THROUGH continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case CDATA_SECTION:
 | |
|                     cdatasectionloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case ']':
 | |
|                                 flushChars(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
 | |
|                                 break cdatasectionloop; // FALL THROUGH
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case CDATA_RSQB:
 | |
|                     cdatarsqb: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case ']':
 | |
|                                 state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
 | |
|                                 break cdatarsqb;
 | |
|                             default:
 | |
|                                 tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
 | |
|                                         1);
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case CDATA_RSQB_RSQB:
 | |
|                     cdatarsqbrsqb: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case ']':
 | |
|                                 // Saw a third ]. Emit one ] (logically the
 | |
|                                 // first one) and stay in this state to
 | |
|                                 // remember that the last two characters seen
 | |
|                                 // have been ]].
 | |
|                                 tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
 | |
|                                 continue;
 | |
|                             case '>':
 | |
|                                 cstart = pos + 1;
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                 case ATTRIBUTE_VALUE_SINGLE_QUOTED:
 | |
|                     attributevaluesinglequotedloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Switch to the after
 | |
|                                  * attribute value (quoted) state.
 | |
|                                  */
 | |
|                                 addAttributeWithValue();
 | |
| 
 | |
|                                 state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '&':
 | |
|                                 /*
 | |
|                                  * U+0026 AMPERSAND (&) Switch to the character
 | |
|                                  * reference in attribute value state, with the
 | |
|                                  * + additional allowed character being U+0027
 | |
|                                  * APOSTROPHE (').
 | |
|                                  */
 | |
|                                 assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
 | |
|                                 appendCharRefBuf(c);
 | |
|                                 setAdditionalAndRememberAmpersandLocation('\'');
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
 | |
|                                 break attributevaluesinglequotedloop;
 | |
|                             // continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current attribute's value.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the attribute value (double-quoted)
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case CONSUME_CHARACTER_REFERENCE:
 | |
|                     if (++pos == endPos) {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     c = checkChar(buf, pos);
 | |
|                     if (c == '\u0000') {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     /*
 | |
|                      * Unlike the definition is the spec, this state does not
 | |
|                      * return a value and never requires the caller to
 | |
|                      * backtrack. This state takes care of emitting characters
 | |
|                      * or appending to the current attribute value. It also
 | |
|                      * takes care of that in the case when consuming the
 | |
|                      * character reference fails.
 | |
|                      */
 | |
|                     /*
 | |
|                      * This section defines how to consume a character
 | |
|                      * reference. This definition is used when parsing character
 | |
|                      * references in text and in attributes.
 | |
|                      *
 | |
|                      * The behavior depends on the identity of the next
 | |
|                      * character (the one immediately after the U+0026 AMPERSAND
 | |
|                      * character):
 | |
|                      */
 | |
|                     switch (c) {
 | |
|                         case ' ':
 | |
|                         case '\t':
 | |
|                         case '\n':
 | |
|                         case '\r': // we'll reconsume!
 | |
|                         case '\u000C':
 | |
|                         case '<':
 | |
|                         case '&':
 | |
|                             emitOrAppendCharRefBuf(returnState);
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                 cstart = pos;
 | |
|                             }
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, returnState, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         case '#':
 | |
|                             /*
 | |
|                              * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
 | |
|                              * SIGN.
 | |
|                              */
 | |
|                             appendCharRefBuf('#');
 | |
|                             state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         default:
 | |
|                             if (c == additional) {
 | |
|                                 emitOrAppendCharRefBuf(returnState);
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             if (c >= 'a' && c <= 'z') {
 | |
|                                 firstCharKey = c - 'a' + 26;
 | |
|                             } else if (c >= 'A' && c <= 'Z') {
 | |
|                                 firstCharKey = c - 'A';
 | |
|                             } else {
 | |
|                                 // No match
 | |
|                                 /*
 | |
|                                  * If no match can be made, then this is a parse
 | |
|                                  * error.
 | |
|                                  */
 | |
|                                 errNoNamedCharacterMatch();
 | |
|                                 emitOrAppendCharRefBuf(returnState);
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos;
 | |
|                                 }
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             // Didn't fail yet
 | |
|                             appendCharRefBuf(c);
 | |
|                             state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
 | |
|                             // FALL THROUGH continue stateloop;
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case CHARACTER_REFERENCE_HILO_LOOKUP:
 | |
|                     {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         if (c == '\u0000') {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         /*
 | |
|                          * The data structure is as follows:
 | |
|                          *
 | |
|                          * HILO_ACCEL is a two-dimensional int array whose major
 | |
|                          * index corresponds to the second character of the
 | |
|                          * character reference (code point as index) and the
 | |
|                          * minor index corresponds to the first character of the
 | |
|                          * character reference (packed so that A-Z runs from 0
 | |
|                          * to 25 and a-z runs from 26 to 51). This layout makes
 | |
|                          * it easier to use the sparseness of the data structure
 | |
|                          * to omit parts of it: The second dimension of the
 | |
|                          * table is null when no character reference starts with
 | |
|                          * the character corresponding to that row.
 | |
|                          *
 | |
|                          * The int value HILO_ACCEL (by these indeces) is zero
 | |
|                          * if there exists no character reference starting with
 | |
|                          * that two-letter prefix. Otherwise, the value is an
 | |
|                          * int that packs two shorts so that the higher short is
 | |
|                          * the index of the highest character reference name
 | |
|                          * with that prefix in NAMES and the lower short
 | |
|                          * corresponds to the index of the lowest character
 | |
|                          * reference name with that prefix. (It happens that the
 | |
|                          * first two character reference names share their
 | |
|                          * prefix so the packed int cannot be 0 by packing the
 | |
|                          * two shorts.)
 | |
|                          *
 | |
|                          * NAMES is an array of byte arrays where each byte
 | |
|                          * array encodes the name of a character references as
 | |
|                          * ASCII. The names omit the first two letters of the
 | |
|                          * name. (Since storing the first two letters would be
 | |
|                          * redundant with the data contained in HILO_ACCEL.) The
 | |
|                          * entries are lexically sorted.
 | |
|                          *
 | |
|                          * For a given index in NAMES, the same index in VALUES
 | |
|                          * contains the corresponding expansion as an array of
 | |
|                          * two UTF-16 code units (either the character and
 | |
|                          * U+0000 or a suggogate pair).
 | |
|                          */
 | |
|                         int hilo = 0;
 | |
|                         if (c <= 'z') {
 | |
|                             @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c];
 | |
|                             if (row != null) {
 | |
|                                 hilo = row[firstCharKey];
 | |
|                             }
 | |
|                         }
 | |
|                         if (hilo == 0) {
 | |
|                             /*
 | |
|                              * If no match can be made, then this is a parse
 | |
|                              * error.
 | |
|                              */
 | |
|                             errNoNamedCharacterMatch();
 | |
|                             emitOrAppendCharRefBuf(returnState);
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                 cstart = pos;
 | |
|                             }
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, returnState, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         }
 | |
|                         // Didn't fail yet
 | |
|                         appendCharRefBuf(c);
 | |
|                         lo = hilo & 0xFFFF;
 | |
|                         hi = hilo >> 16;
 | |
|                         entCol = -1;
 | |
|                         candidate = -1;
 | |
|                         charRefBufMark = 0;
 | |
|                         state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
 | |
|                         // FALL THROUGH continue stateloop;
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case CHARACTER_REFERENCE_TAIL:
 | |
|                     outer: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         if (c == '\u0000') {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         entCol++;
 | |
|                         /*
 | |
|                          * Consume the maximum number of characters possible,
 | |
|                          * with the consumed characters matching one of the
 | |
|                          * identifiers in the first column of the named
 | |
|                          * character references table (in a case-sensitive
 | |
|                          * manner).
 | |
|                          */
 | |
|                         loloop: for (;;) {
 | |
|                             if (hi < lo) {
 | |
|                                 break outer;
 | |
|                             }
 | |
|                             if (entCol == NamedCharacters.NAMES[lo].length()) {
 | |
|                                 candidate = lo;
 | |
|                                 charRefBufMark = charRefBufLen;
 | |
|                                 lo++;
 | |
|                             } else if (entCol > NamedCharacters.NAMES[lo].length()) {
 | |
|                                 break outer;
 | |
|                             } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
 | |
|                                 lo++;
 | |
|                             } else {
 | |
|                                 break loloop;
 | |
|                             }
 | |
|                         }
 | |
| 
 | |
|                         hiloop: for (;;) {
 | |
|                             if (hi < lo) {
 | |
|                                 break outer;
 | |
|                             }
 | |
|                             if (entCol == NamedCharacters.NAMES[hi].length()) {
 | |
|                                 break hiloop;
 | |
|                             }
 | |
|                             if (entCol > NamedCharacters.NAMES[hi].length()) {
 | |
|                                 break outer;
 | |
|                             } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
 | |
|                                 hi--;
 | |
|                             } else {
 | |
|                                 break hiloop;
 | |
|                             }
 | |
|                         }
 | |
| 
 | |
|                         if (c == ';') {
 | |
|                             // If we see a semicolon, there cannot be a
 | |
|                             // longer match. Break the loop. However, before
 | |
|                             // breaking, take the longest match so far as the
 | |
|                             // candidate, if we are just about to complete a
 | |
|                             // match.
 | |
|                             if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
 | |
|                                 candidate = lo;
 | |
|                                 charRefBufMark = charRefBufLen;
 | |
|                             }
 | |
|                             break outer;
 | |
|                         }
 | |
| 
 | |
|                         if (hi < lo) {
 | |
|                             break outer;
 | |
|                         }
 | |
|                         appendCharRefBuf(c);
 | |
|                         continue;
 | |
|                     }
 | |
| 
 | |
|                     if (candidate == -1) {
 | |
|                         // reconsume deals with CR, LF or nul
 | |
|                         /*
 | |
|                          * If no match can be made, then this is a parse error.
 | |
|                          */
 | |
|                         errNoNamedCharacterMatch();
 | |
|                         emitOrAppendCharRefBuf(returnState);
 | |
|                         if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                             cstart = pos;
 | |
|                         }
 | |
|                         reconsume = true;
 | |
|                         state = transition(state, returnState, reconsume, pos);
 | |
|                         continue stateloop;
 | |
|                     } else {
 | |
|                         // c can't be CR, LF or nul if we got here
 | |
|                         @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
 | |
|                         if (candidateName.length() == 0
 | |
|                                 || candidateName.charAt(candidateName.length() - 1) != ';') {
 | |
|                             /*
 | |
|                              * If the last character matched is not a U+003B
 | |
|                              * SEMICOLON (;), there is a parse error.
 | |
|                              */
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|                                 /*
 | |
|                                  * If the entity is being consumed as part of an
 | |
|                                  * attribute, and the last character matched is
 | |
|                                  * not a U+003B SEMICOLON (;),
 | |
|                                  */
 | |
|                                 char ch;
 | |
|                                 if (charRefBufMark == charRefBufLen) {
 | |
|                                     ch = c;
 | |
|                                 } else {
 | |
|                                     ch = charRefBuf[charRefBufMark];
 | |
|                                 }
 | |
|                                 if (ch == '=' || (ch >= '0' && ch <= '9')
 | |
|                                         || (ch >= 'A' && ch <= 'Z')
 | |
|                                         || (ch >= 'a' && ch <= 'z')) {
 | |
|                                     /*
 | |
|                                      * and the next character is either a U+003D
 | |
|                                      * EQUALS SIGN character (=) or in the range
 | |
|                                      * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A to U+005A
 | |
|                                      * LATIN CAPITAL LETTER Z, or U+0061 LATIN
 | |
|                                      * SMALL LETTER A to U+007A LATIN SMALL
 | |
|                                      * LETTER Z, then, for historical reasons,
 | |
|                                      * all the characters that were matched
 | |
|                                      * after the U+0026 AMPERSAND (&) must be
 | |
|                                      * unconsumed, and nothing is returned.
 | |
|                                      */
 | |
|                                     errNoNamedCharacterMatch();
 | |
|                                     appendCharRefBufToStrBuf();
 | |
|                                     reconsume = true;
 | |
|                                     state = transition(state, returnState, reconsume, pos);
 | |
|                                     continue stateloop;
 | |
|                                 }
 | |
|                             }
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|                                 errUnescapedAmpersandInterpretedAsCharacterReference();
 | |
|                             } else {
 | |
|                                 errNotSemicolonTerminated();
 | |
|                             }
 | |
|                         }
 | |
| 
 | |
|                         /*
 | |
|                          * Otherwise, return a character token for the character
 | |
|                          * corresponding to the entity name (as given by the
 | |
|                          * second column of the named character references
 | |
|                          * table).
 | |
|                          */
 | |
|                         // CPPONLY: completedNamedCharacterReference();
 | |
|                         @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
 | |
|                         if (
 | |
|                         // [NOCPP[
 | |
|                         val.length == 1
 | |
|                         // ]NOCPP]
 | |
|                         // CPPONLY: val[1] == 0
 | |
|                         ) {
 | |
|                             emitOrAppendOne(val, returnState);
 | |
|                         } else {
 | |
|                             emitOrAppendTwo(val, returnState);
 | |
|                         }
 | |
|                         // this is so complicated!
 | |
|                         if (charRefBufMark < charRefBufLen) {
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|                                 appendStrBuf(charRefBuf, charRefBufMark,
 | |
|                                         charRefBufLen - charRefBufMark);
 | |
|                             } else {
 | |
|                                 tokenHandler.characters(charRefBuf, charRefBufMark,
 | |
|                                         charRefBufLen - charRefBufMark);
 | |
|                             }
 | |
|                         }
 | |
|                         // charRefBufLen will be zeroed below!
 | |
| 
 | |
|                         // Check if we broke out early with c being the last
 | |
|                         // character that matched as opposed to being the
 | |
|                         // first one that didn't match. In the case of an
 | |
|                         // early break, the next run on text should start
 | |
|                         // *after* the current character and the current
 | |
|                         // character shouldn't be reconsumed.
 | |
|                         boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
 | |
|                         charRefBufLen = 0;
 | |
|                         if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                             cstart = earlyBreak ? pos + 1 : pos;
 | |
|                         }
 | |
|                         reconsume = !earlyBreak;
 | |
|                         state = transition(state, returnState, reconsume, pos);
 | |
|                         continue stateloop;
 | |
|                         /*
 | |
|                          * If the markup contains I'm ¬it; I tell you, the
 | |
|                          * entity is parsed as "not", as in, I'm ¬it; I tell
 | |
|                          * you. But if the markup was I'm ∉ I tell you,
 | |
|                          * the entity would be parsed as "notin;", resulting in
 | |
|                          * I'm ∉ I tell you.
 | |
|                          */
 | |
|                     }
 | |
|                 case CONSUME_NCR:
 | |
|                     if (++pos == endPos) {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     c = checkChar(buf, pos);
 | |
|                     value = 0;
 | |
|                     seenDigits = false;
 | |
|                     /*
 | |
|                      * The behavior further depends on the character after the
 | |
|                      * U+0023 NUMBER SIGN:
 | |
|                      */
 | |
|                     switch (c) {
 | |
|                         case 'x':
 | |
|                         case 'X':
 | |
| 
 | |
|                             /*
 | |
|                              * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
 | |
|                              * LETTER X Consume the X.
 | |
|                              *
 | |
|                              * Follow the steps below, but using the range of
 | |
|                              * characters U+0030 DIGIT ZERO through to U+0039
 | |
|                              * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
 | |
|                              * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
 | |
|                              * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
 | |
|                              * LETTER F (in other words, 0-9, A-F, a-f).
 | |
|                              *
 | |
|                              * When it comes to interpreting the number,
 | |
|                              * interpret it as a hexadecimal number.
 | |
|                              */
 | |
|                             appendCharRefBuf(c);
 | |
|                             state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         default:
 | |
|                             /*
 | |
|                              * Anything else Follow the steps below, but using
 | |
|                              * the range of characters U+0030 DIGIT ZERO through
 | |
|                              * to U+0039 DIGIT NINE (i.e. just 0-9).
 | |
|                              *
 | |
|                              * When it comes to interpreting the number,
 | |
|                              * interpret it as a decimal number.
 | |
|                              */
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
 | |
|                             // FALL THROUGH continue stateloop;
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DECIMAL_NRC_LOOP:
 | |
|                     decimalloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume as many characters as match the range of
 | |
|                          * characters given above.
 | |
|                          */
 | |
|                         assert value >= 0: "value must not become negative.";
 | |
|                         if (c >= '0' && c <= '9') {
 | |
|                             seenDigits = true;
 | |
|                             // Avoid overflow
 | |
|                             if (value <= 0x10FFFF) {
 | |
|                                 value *= 10;
 | |
|                                 value += c - '0';
 | |
|                             }
 | |
|                             continue;
 | |
|                         } else if (c == ';') {
 | |
|                             if (seenDigits) {
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos + 1;
 | |
|                                 }
 | |
|                                 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
 | |
|                                 // FALL THROUGH continue stateloop;
 | |
|                                 break decimalloop;
 | |
|                             } else {
 | |
|                                 errNoDigitsInNCR();
 | |
|                                 appendCharRefBuf(';');
 | |
|                                 emitOrAppendCharRefBuf(returnState);
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos + 1;
 | |
|                                 }
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                         } else {
 | |
|                             /*
 | |
|                              * If no characters match the range, then don't
 | |
|                              * consume any characters (and unconsume the U+0023
 | |
|                              * NUMBER SIGN character and, if appropriate, the X
 | |
|                              * character). This is a parse error; nothing is
 | |
|                              * returned.
 | |
|                              *
 | |
|                              * Otherwise, if the next character is a U+003B
 | |
|                              * SEMICOLON, consume that too. If it isn't, there
 | |
|                              * is a parse error.
 | |
|                              */
 | |
|                             if (!seenDigits) {
 | |
|                                 errNoDigitsInNCR();
 | |
|                                 emitOrAppendCharRefBuf(returnState);
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos;
 | |
|                                 }
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             } else {
 | |
|                                 errCharRefLacksSemicolon();
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos;
 | |
|                                 }
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
 | |
|                                 // FALL THROUGH continue stateloop;
 | |
|                                 break decimalloop;
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case HANDLE_NCR_VALUE:
 | |
|                     // WARNING previous state sets reconsume
 | |
|                     // We are not going to emit the contents of charRefBuf.
 | |
|                     charRefBufLen = 0;
 | |
|                     // XXX inline this case if the method size can take it
 | |
|                     handleNcrValue(returnState);
 | |
|                     state = transition(state, returnState, reconsume, pos);
 | |
|                     continue stateloop;
 | |
|                 case HEX_NCR_LOOP:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume as many characters as match the range of
 | |
|                          * characters given above.
 | |
|                          */
 | |
|                         assert value >= 0: "value must not become negative.";
 | |
|                         if (c >= '0' && c <= '9') {
 | |
|                             seenDigits = true;
 | |
|                             // Avoid overflow
 | |
|                             if (value <= 0x10FFFF) {
 | |
|                                 value *= 16;
 | |
|                                 value += c - '0';
 | |
|                             }
 | |
|                             continue;
 | |
|                         } else if (c >= 'A' && c <= 'F') {
 | |
|                             seenDigits = true;
 | |
|                             // Avoid overflow
 | |
|                             if (value <= 0x10FFFF) {
 | |
|                                 value *= 16;
 | |
|                                 value += c - 'A' + 10;
 | |
|                             }
 | |
|                             continue;
 | |
|                         } else if (c >= 'a' && c <= 'f') {
 | |
|                             seenDigits = true;
 | |
|                             // Avoid overflow
 | |
|                             if (value <= 0x10FFFF) {
 | |
|                                 value *= 16;
 | |
|                                 value += c - 'a' + 10;
 | |
|                             }
 | |
|                             continue;
 | |
|                         } else if (c == ';') {
 | |
|                             if (seenDigits) {
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos + 1;
 | |
|                                 }
 | |
|                                 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             } else {
 | |
|                                 errNoDigitsInNCR();
 | |
|                                 appendCharRefBuf(';');
 | |
|                                 emitOrAppendCharRefBuf(returnState);
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos + 1;
 | |
|                                 }
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                         } else {
 | |
|                             /*
 | |
|                              * If no characters match the range, then don't
 | |
|                              * consume any characters (and unconsume the U+0023
 | |
|                              * NUMBER SIGN character and, if appropriate, the X
 | |
|                              * character). This is a parse error; nothing is
 | |
|                              * returned.
 | |
|                              *
 | |
|                              * Otherwise, if the next character is a U+003B
 | |
|                              * SEMICOLON, consume that too. If it isn't, there
 | |
|                              * is a parse error.
 | |
|                              */
 | |
|                             if (!seenDigits) {
 | |
|                                 errNoDigitsInNCR();
 | |
|                                 emitOrAppendCharRefBuf(returnState);
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos;
 | |
|                                 }
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             } else {
 | |
|                                 errCharRefLacksSemicolon();
 | |
|                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
 | |
|                                     cstart = pos;
 | |
|                                 }
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 case PLAINTEXT:
 | |
|                     plaintextloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '\u0000':
 | |
|                                 emitPlaintextReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Stay in the
 | |
|                                  * RAWTEXT state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case CLOSE_TAG_OPEN:
 | |
|                     if (++pos == endPos) {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     c = checkChar(buf, pos);
 | |
|                     /*
 | |
|                      * Otherwise, if the content model flag is set to the PCDATA
 | |
|                      * state, or if the next few characters do match that tag
 | |
|                      * name, consume the next input character:
 | |
|                      */
 | |
|                     switch (c) {
 | |
|                         case '>':
 | |
|                             /* U+003E GREATER-THAN SIGN (>) Parse error. */
 | |
|                             errLtSlashGt();
 | |
|                             /*
 | |
|                              * Switch to the data state.
 | |
|                              */
 | |
|                             cstart = pos + 1;
 | |
|                             state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         case '\r':
 | |
|                             silentCarriageReturn();
 | |
|                             /* Anything else Parse error. */
 | |
|                             errGarbageAfterLtSlash();
 | |
|                             /*
 | |
|                              * Switch to the bogus comment state.
 | |
|                              */
 | |
|                             clearStrBufBeforeUse();
 | |
|                             appendStrBuf('\n');
 | |
|                             state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                             break stateloop;
 | |
|                         case '\n':
 | |
|                             silentLineFeed();
 | |
|                             /* Anything else Parse error. */
 | |
|                             errGarbageAfterLtSlash();
 | |
|                             /*
 | |
|                              * Switch to the bogus comment state.
 | |
|                              */
 | |
|                             clearStrBufBeforeUse();
 | |
|                             appendStrBuf(c);
 | |
|                             state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         case '\u0000':
 | |
|                             c = '\uFFFD';
 | |
|                             // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                         default:
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 c += 0x20;
 | |
|                             }
 | |
|                             if (c >= 'a' && c <= 'z') {
 | |
|                                 /*
 | |
|                                  * U+0061 LATIN SMALL LETTER A through to U+007A
 | |
|                                  * LATIN SMALL LETTER Z Create a new end tag
 | |
|                                  * token,
 | |
|                                  */
 | |
|                                 endTag = true;
 | |
|                                 /*
 | |
|                                  * set its tag name to the input character,
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 containsHyphen = false;
 | |
|                                 /*
 | |
|                                  * then switch to the tag name state. (Don't
 | |
|                                  * emit the token yet; further details will be
 | |
|                                  * filled in before it is emitted.)
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             } else {
 | |
|                                 /* Anything else Parse error. */
 | |
|                                 errGarbageAfterLtSlash();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus comment state.
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                     }
 | |
|                 case RCDATA:
 | |
|                     rcdataloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '&':
 | |
|                                 /*
 | |
|                                  * U+0026 AMPERSAND (&) Switch to the character
 | |
|                                  * reference in RCDATA state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
|                                 assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
 | |
|                                 appendCharRefBuf(c);
 | |
|                                 setAdditionalAndRememberAmpersandLocation('\u0000');
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the
 | |
|                                  * RCDATA less-than sign state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
| 
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Emit the current input character as a
 | |
|                                  * character token. Stay in the RCDATA state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case RAWTEXT:
 | |
|                     rawtextloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the
 | |
|                                  * RAWTEXT less-than sign state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
| 
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 break rawtextloop;
 | |
|                             // FALL THRU continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Emit the current input character as a
 | |
|                                  * character token. Stay in the RAWTEXT state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case RAWTEXT_RCDATA_LESS_THAN_SIGN:
 | |
|                     rawtextrcdatalessthansignloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Set the temporary buffer
 | |
|                                  * to the empty string. Switch to the script
 | |
|                                  * data end tag open state.
 | |
|                                  */
 | |
|                                 index = 0;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
 | |
|                                 break rawtextrcdatalessthansignloop;
 | |
|                             // FALL THRU continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Otherwise, emit a U+003C LESS-THAN SIGN
 | |
|                                  * character token
 | |
|                                  */
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                                 /*
 | |
|                                  * and reconsume the current input character in
 | |
|                                  * the data state.
 | |
|                                  */
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case NON_DATA_END_TAG_NAME:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * ASSERT! when entering this state, set index to 0 and
 | |
|                          * call clearStrBufBeforeUse() assert (contentModelElement !=
 | |
|                          * null); Let's implement the above without lookahead.
 | |
|                          * strBuf is the 'temporary buffer'.
 | |
|                          */
 | |
|                         if (index < endTagExpectationAsArray.length) {
 | |
|                             char e = endTagExpectationAsArray[index];
 | |
|                             char folded = c;
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 folded += 0x20;
 | |
|                             }
 | |
|                             if (folded != e) {
 | |
|                                 // [NOCPP[
 | |
|                                 errHtml4LtSlashInRcdata(folded);
 | |
|                                 // ]NOCPP]
 | |
|                                 tokenHandler.characters(Tokenizer.LT_SOLIDUS,
 | |
|                                         0, 2);
 | |
|                                 emitStrBuf();
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, returnState, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             appendStrBuf(c);
 | |
|                             index++;
 | |
|                             continue;
 | |
|                         } else {
 | |
|                             endTag = true;
 | |
|                             // XXX replace contentModelElement with different
 | |
|                             // type
 | |
|                             tagName = endTagExpectation;
 | |
|                             switch (c) {
 | |
|                                 case '\r':
 | |
|                                     silentCarriageReturn();
 | |
|                                     clearStrBufAfterUse(); // strBuf not used
 | |
|                                     state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                     break stateloop;
 | |
|                                 case '\n':
 | |
|                                     silentLineFeed();
 | |
|                                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                                 case ' ':
 | |
|                                 case '\t':
 | |
|                                 case '\u000C':
 | |
|                                     /*
 | |
|                                      * U+0009 CHARACTER TABULATION U+000A LINE
 | |
|                                      * FEED (LF) U+000C FORM FEED (FF) U+0020
 | |
|                                      * SPACE If the current end tag token is an
 | |
|                                      * appropriate end tag token, then switch to
 | |
|                                      * the before attribute name state.
 | |
|                                      */
 | |
|                                     clearStrBufAfterUse(); // strBuf not used
 | |
|                                     state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
 | |
|                                     continue stateloop;
 | |
|                                 case '/':
 | |
|                                     /*
 | |
|                                      * U+002F SOLIDUS (/) If the current end tag
 | |
|                                      * token is an appropriate end tag token,
 | |
|                                      * then switch to the self-closing start tag
 | |
|                                      * state.
 | |
|                                      */
 | |
|                                     clearStrBufAfterUse(); // strBuf not used
 | |
|                                     state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
 | |
|                                     continue stateloop;
 | |
|                                 case '>':
 | |
|                                     /*
 | |
|                                      * U+003E GREATER-THAN SIGN (>) If the
 | |
|                                      * current end tag token is an appropriate
 | |
|                                      * end tag token, then emit the current tag
 | |
|                                      * token and switch to the data state.
 | |
|                                      */
 | |
|                                     clearStrBufAfterUse(); // strBuf not used
 | |
|                                     state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
 | |
|                                     if (shouldSuspend) {
 | |
|                                         break stateloop;
 | |
|                                     }
 | |
|                                     continue stateloop;
 | |
|                                 default:
 | |
|                                     /*
 | |
|                                      * Emit a U+003C LESS-THAN SIGN character
 | |
|                                      * token, a U+002F SOLIDUS character token,
 | |
|                                      * a character token for each of the
 | |
|                                      * characters in the temporary buffer (in
 | |
|                                      * the order they were added to the buffer),
 | |
|                                      * and reconsume the current input character
 | |
|                                      * in the RAWTEXT state.
 | |
|                                      */
 | |
|                                     // [NOCPP[
 | |
|                                     errWarnLtSlashInRcdata();
 | |
|                                     // ]NOCPP]
 | |
|                                     tokenHandler.characters(
 | |
|                                             Tokenizer.LT_SOLIDUS, 0, 2);
 | |
|                                     emitStrBuf();
 | |
|                                     if (c == '\u0000') {
 | |
|                                         emitReplacementCharacter(buf, pos);
 | |
|                                     } else {
 | |
|                                         cstart = pos; // don't drop the
 | |
|                                         // character
 | |
|                                     }
 | |
|                                     state = transition(state, returnState, reconsume, pos);
 | |
|                                     continue stateloop;
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                     // BEGIN HOTSPOT WORKAROUND
 | |
|                 case BOGUS_COMMENT:
 | |
|                     boguscommentloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume every character up to and including the first
 | |
|                          * U+003E GREATER-THAN SIGN character (>) or the end of
 | |
|                          * the file (EOF), whichever comes first. Emit a comment
 | |
|                          * token whose data is the concatenation of all the
 | |
|                          * characters starting from and including the character
 | |
|                          * that caused the state machine to switch into the
 | |
|                          * bogus comment state, up to and including the
 | |
|                          * character immediately before the last consumed
 | |
|                          * character (i.e. up to the character just before the
 | |
|                          * U+003E or EOF character). (If the comment was started
 | |
|                          * by the end of the file (EOF), the token is empty.)
 | |
|                          *
 | |
|                          * Switch to the data state.
 | |
|                          *
 | |
|                          * If the end of the file was reached, reconsume the EOF
 | |
|                          * character.
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '>':
 | |
|                                 emitComment(0, pos);
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '-':
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
 | |
|                                 break boguscommentloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 appendStrBuf(c);
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BOGUS_COMMENT_HYPHEN:
 | |
|                     boguscommenthyphenloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case '>':
 | |
|                                 // [NOCPP[
 | |
|                                 maybeAppendSpaceToBogusComment();
 | |
|                                 // ]NOCPP]
 | |
|                                 emitComment(0, pos);
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '-':
 | |
|                                 appendSecondHyphenToBogusComment();
 | |
|                                 continue boguscommenthyphenloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 appendStrBuf(c);
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                 case SCRIPT_DATA:
 | |
|                     scriptdataloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the
 | |
|                                  * script data less-than sign state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
|                                 returnState = state;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 break scriptdataloop; // FALL THRU continue
 | |
|                             // stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Stay in the
 | |
|                                  * script data state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_LESS_THAN_SIGN:
 | |
|                     scriptdatalessthansignloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Set the temporary buffer
 | |
|                                  * to the empty string. Switch to the script
 | |
|                                  * data end tag open state.
 | |
|                                  */
 | |
|                                 index = 0;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '!':
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                                 cstart = pos;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
 | |
|                                 break scriptdatalessthansignloop; // FALL THRU
 | |
|                             // continue
 | |
|                             // stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Otherwise, emit a U+003C LESS-THAN SIGN
 | |
|                                  * character token
 | |
|                                  */
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                                 /*
 | |
|                                  * and reconsume the current input character in
 | |
|                                  * the data state.
 | |
|                                  */
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_ESCAPE_START:
 | |
|                     scriptdataescapestartloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Switch to the
 | |
|                                  * script data escape start dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
 | |
|                                 break scriptdataescapestartloop; // FALL THRU
 | |
|                             // continue
 | |
|                             // stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Reconsume the current input
 | |
|                                  * character in the script data state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_ESCAPE_START_DASH:
 | |
|                     scriptdataescapestartdashloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Switch to the
 | |
|                                  * script data escaped dash dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
 | |
|                                 break scriptdataescapestartdashloop;
 | |
|                             // continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Reconsume the current input
 | |
|                                  * character in the script data state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_ESCAPED_DASH_DASH:
 | |
|                     scriptdataescapeddashdashloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Stay in the
 | |
|                                  * script data escaped dash dash state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the
 | |
|                                  * script data escaped less-than sign state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit a U+003E
 | |
|                                  * GREATER-THAN SIGN character token. Switch to
 | |
|                                  * the script data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 break scriptdataescapeddashdashloop;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Switch to the
 | |
|                                  * script data escaped state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 break scriptdataescapeddashdashloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_ESCAPED:
 | |
|                     scriptdataescapedloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Switch to the
 | |
|                                  * script data escaped dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
 | |
|                                 break scriptdataescapedloop; // FALL THRU
 | |
|                             // continue
 | |
|                             // stateloop;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the
 | |
|                                  * script data escaped less-than sign state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Stay in the
 | |
|                                  * script data escaped state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_ESCAPED_DASH:
 | |
|                     scriptdataescapeddashloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Switch to the
 | |
|                                  * script data escaped dash dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Switch to the
 | |
|                                  * script data escaped less-than sign state.
 | |
|                                  */
 | |
|                                 flushChars(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 break scriptdataescapeddashloop;
 | |
|                             // continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Switch to the
 | |
|                                  * script data escaped state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
 | |
|                     scriptdataescapedlessthanloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Set the temporary buffer
 | |
|                                  * to the empty string. Switch to the script
 | |
|                                  * data escaped end tag open state.
 | |
|                                  */
 | |
|                                 index = 0;
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
 | |
|                                 state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case 'S':
 | |
|                             case 's':
 | |
|                                 /*
 | |
|                                  * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                  * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
 | |
|                                  * LESS-THAN SIGN character token and the
 | |
|                                  * current input character as a character token.
 | |
|                                  */
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                                 cstart = pos;
 | |
|                                 index = 1;
 | |
|                                 /*
 | |
|                                  * Set the temporary buffer to the empty string.
 | |
|                                  * Append the lowercase version of the current
 | |
|                                  * input character (add 0x0020 to the
 | |
|                                  * character's code point) to the temporary
 | |
|                                  * buffer. Switch to the script data double
 | |
|                                  * escape start state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
 | |
|                                 break scriptdataescapedlessthanloop;
 | |
|                             // continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit a U+003C LESS-THAN SIGN
 | |
|                                  * character token and reconsume the current
 | |
|                                  * input character in the script data escaped
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                                 cstart = pos;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_DOUBLE_ESCAPE_START:
 | |
|                     scriptdatadoubleescapestartloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         assert index > 0;
 | |
|                         if (index < 6) { // SCRIPT_ARR.length
 | |
|                             char folded = c;
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 folded += 0x20;
 | |
|                             }
 | |
|                             if (folded != Tokenizer.SCRIPT_ARR[index]) {
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             index++;
 | |
|                             continue;
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                             case '/':
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
 | |
|                                  * (>) Emit the current input character as a
 | |
|                                  * character token. If the temporary buffer is
 | |
|                                  * the string "script", then switch to the
 | |
|                                  * script data double escaped state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 break scriptdatadoubleescapestartloop;
 | |
|                             // continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Reconsume the current input
 | |
|                                  * character in the script data escaped state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_DOUBLE_ESCAPED:
 | |
|                     scriptdatadoubleescapedloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Switch to the
 | |
|                                  * script data double escaped dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
 | |
|                                 break scriptdatadoubleescapedloop; // FALL THRU
 | |
|                             // continue
 | |
|                             // stateloop;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Emit a U+003C
 | |
|                                  * LESS-THAN SIGN character token. Switch to the
 | |
|                                  * script data double escaped less-than sign
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 continue;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Stay in the
 | |
|                                  * script data double escaped state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
 | |
|                     scriptdatadoubleescapeddashloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Switch to the
 | |
|                                  * script data double escaped dash dash state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
 | |
|                                 break scriptdatadoubleescapeddashloop;
 | |
|                             // continue stateloop;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Emit a U+003C
 | |
|                                  * LESS-THAN SIGN character token. Switch to the
 | |
|                                  * script data double escaped less-than sign
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Switch to the
 | |
|                                  * script data double escaped state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
 | |
|                     scriptdatadoubleescapeddashdashloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '-':
 | |
|                                 /*
 | |
|                                  * U+002D HYPHEN-MINUS (-) Emit a U+002D
 | |
|                                  * HYPHEN-MINUS character token. Stay in the
 | |
|                                  * script data double escaped dash dash state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '<':
 | |
|                                 /*
 | |
|                                  * U+003C LESS-THAN SIGN (<) Emit a U+003C
 | |
|                                  * LESS-THAN SIGN character token. Switch to the
 | |
|                                  * script data double escaped less-than sign
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
 | |
|                                 break scriptdatadoubleescapeddashdashloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit a U+003E
 | |
|                                  * GREATER-THAN SIGN character token. Switch to
 | |
|                                  * the script data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 emitReplacementCharacter(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Emit the current input
 | |
|                                  * character as a character token. Switch to the
 | |
|                                  * script data double escaped state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
 | |
|                     scriptdatadoubleescapedlessthanloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '/':
 | |
|                                 /*
 | |
|                                  * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
 | |
|                                  * character token. Set the temporary buffer to
 | |
|                                  * the empty string. Switch to the script data
 | |
|                                  * double escape end state.
 | |
|                                  */
 | |
|                                 index = 0;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
 | |
|                                 break scriptdatadoubleescapedlessthanloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Reconsume the current input
 | |
|                                  * character in the script data double escaped
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case SCRIPT_DATA_DOUBLE_ESCAPE_END:
 | |
|                     scriptdatadoubleescapeendloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         if (index < 6) { // SCRIPT_ARR.length
 | |
|                             char folded = c;
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 folded += 0x20;
 | |
|                             }
 | |
|                             if (folded != Tokenizer.SCRIPT_ARR[index]) {
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             index++;
 | |
|                             continue;
 | |
|                         }
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 emitCarriageReturn(buf, pos);
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                             case '/':
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
 | |
|                                  * (>) Emit the current input character as a
 | |
|                                  * character token. If the temporary buffer is
 | |
|                                  * the string "script", then switch to the
 | |
|                                  * script data escaped state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Reconsume the current input character in the
 | |
|                                  * script data double escaped state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                 case MARKUP_DECLARATION_OCTYPE:
 | |
|                     markupdeclarationdoctypeloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         if (index < 6) { // OCTYPE.length
 | |
|                             char folded = c;
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 folded += 0x20;
 | |
|                             }
 | |
|                             if (folded == Tokenizer.OCTYPE[index]) {
 | |
|                                 appendStrBuf(c);
 | |
|                             } else {
 | |
|                                 errBogusComment();
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             index++;
 | |
|                             continue;
 | |
|                         } else {
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
 | |
|                             break markupdeclarationdoctypeloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DOCTYPE:
 | |
|                     doctypeloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         initDoctypeFields();
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the before DOCTYPE name state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
 | |
|                                 break doctypeloop;
 | |
|                             // continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Parse error.
 | |
|                                  */
 | |
|                                 errMissingSpaceBeforeDoctypeName();
 | |
|                                 /*
 | |
|                                  * Reconsume the current character in the before
 | |
|                                  * DOCTYPE name state.
 | |
|                                  */
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
 | |
|                                 break doctypeloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BEFORE_DOCTYPE_NAME:
 | |
|                     beforedoctypenameloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the before DOCTYPE name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Parse error.
 | |
|                                  */
 | |
|                                 errNamelessDoctype();
 | |
|                                 /*
 | |
|                                  * Create a new DOCTYPE token. Set its
 | |
|                                  * force-quirks flag to on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit the token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 if (c >= 'A' && c <= 'Z') {
 | |
|                                     /*
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                      * U+005A LATIN CAPITAL LETTER Z Create a
 | |
|                                      * new DOCTYPE token. Set the token's name
 | |
|                                      * to the lowercase version of the input
 | |
|                                      * character (add 0x0020 to the character's
 | |
|                                      * code point).
 | |
|                                      */
 | |
|                                     c += 0x20;
 | |
|                                 }
 | |
|                                 /* Anything else Create a new DOCTYPE token. */
 | |
|                                 /*
 | |
|                                  * Set the token's name name to the current
 | |
|                                  * input character.
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Switch to the DOCTYPE name state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
 | |
|                                 break beforedoctypenameloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DOCTYPE_NAME:
 | |
|                     doctypenameloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 strBufToDoctypeName();
 | |
|                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the after DOCTYPE name state.
 | |
|                                  */
 | |
|                                 strBufToDoctypeName();
 | |
|                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
 | |
|                                 break doctypenameloop;
 | |
|                             // continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * DOCTYPE token.
 | |
|                                  */
 | |
|                                 strBufToDoctypeName();
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * U+0041 LATIN CAPITAL LETTER A through to
 | |
|                                  * U+005A LATIN CAPITAL LETTER Z Append the
 | |
|                                  * lowercase version of the input character (add
 | |
|                                  * 0x0020 to the character's code point) to the
 | |
|                                  * current DOCTYPE token's name.
 | |
|                                  */
 | |
|                                 if (c >= 'A' && c <= 'Z') {
 | |
|                                     c += 0x0020;
 | |
|                                 }
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current DOCTYPE token's
 | |
|                                  * name.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the DOCTYPE name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case AFTER_DOCTYPE_NAME:
 | |
|                     afterdoctypenameloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the after DOCTYPE name state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case 'p':
 | |
|                             case 'P':
 | |
|                                 index = 0;
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
 | |
|                                 break afterdoctypenameloop;
 | |
|                             // continue stateloop;
 | |
|                             case 's':
 | |
|                             case 'S':
 | |
|                                 index = 0;
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Otherwise, this is the parse error.
 | |
|                                  */
 | |
|                                 bogusDoctype();
 | |
| 
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DOCTYPE_UBLIC:
 | |
|                     doctypeublicloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * If the six characters starting from the current input
 | |
|                          * character are an ASCII case-insensitive match for the
 | |
|                          * word "PUBLIC", then consume those characters and
 | |
|                          * switch to the before DOCTYPE public identifier state.
 | |
|                          */
 | |
|                         if (index < 5) { // UBLIC.length
 | |
|                             char folded = c;
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 folded += 0x20;
 | |
|                             }
 | |
|                             if (folded != Tokenizer.UBLIC[index]) {
 | |
|                                 bogusDoctype();
 | |
|                                 // forceQuirks = true;
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             index++;
 | |
|                             continue;
 | |
|                         } else {
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
 | |
|                             break doctypeublicloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case AFTER_DOCTYPE_PUBLIC_KEYWORD:
 | |
|                     afterdoctypepublickeywordloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the before DOCTYPE public
 | |
|                                  * identifier state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
 | |
|                                 break afterdoctypepublickeywordloop;
 | |
|                             // FALL THROUGH continue stateloop
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Parse Error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's public identifier to
 | |
|                                  * the empty string (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE public identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Parse Error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's public identifier to
 | |
|                                  * the empty string (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE public identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
 | |
|                                 errExpectedPublicId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
 | |
|                     beforedoctypepublicidentifierloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the before DOCTYPE public identifier
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
 | |
|                                  * token's public identifier to the empty string
 | |
|                                  * (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE public identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 break beforedoctypepublicidentifierloop;
 | |
|                             // continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
 | |
|                                  * public identifier to the empty string (not
 | |
|                                  * missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE public identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
 | |
|                                 errExpectedPublicId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
 | |
|                     doctypepublicidentifierdoublequotedloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Switch to the after
 | |
|                                  * DOCTYPE public identifier state.
 | |
|                                  */
 | |
|                                 publicIdentifier = strBufToString();
 | |
|                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
 | |
|                                 break doctypepublicidentifierdoublequotedloop;
 | |
|                             // continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Parse error.
 | |
|                                  */
 | |
|                                 errGtInPublicId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 publicIdentifier = strBufToString();
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current DOCTYPE token's
 | |
|                                  * public identifier.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the DOCTYPE public identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
 | |
|                     afterdoctypepublicidentifierloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the between DOCTYPE public and
 | |
|                                  * system identifiers state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
 | |
|                                 break afterdoctypepublicidentifierloop;
 | |
|                             // continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Parse error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenPublicAndSystemIds();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's system identifier to
 | |
|                                  * the empty string (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE system identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Parse error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenPublicAndSystemIds();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's system identifier to
 | |
|                                  * the empty string (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE system identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
 | |
|                     betweendoctypepublicandsystemidentifiersloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the between DOCTYPE public and system
 | |
|                                  * identifiers state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
 | |
|                                  * token's system identifier to the empty string
 | |
|                                  * (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE system identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 break betweendoctypepublicandsystemidentifiersloop;
 | |
|                             // continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
 | |
|                                  * system identifier to the empty string (not
 | |
|                                  * missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE system identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
 | |
|                     doctypesystemidentifierdoublequotedloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Switch to the after
 | |
|                                  * DOCTYPE system identifier state.
 | |
|                                  */
 | |
|                                 systemIdentifier = strBufToString();
 | |
|                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Parse error.
 | |
|                                  */
 | |
|                                 errGtInSystemId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 systemIdentifier = strBufToString();
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current DOCTYPE token's
 | |
|                                  * system identifier.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the DOCTYPE system identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
 | |
|                     afterdoctypesystemidentifierloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the after DOCTYPE system identifier state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit the current
 | |
|                                  * DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state. (This does
 | |
|                                  * not set the DOCTYPE token's force-quirks flag
 | |
|                                  * to on.)
 | |
|                                  */
 | |
|                                 bogusDoctypeWithoutQuirks();
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 break afterdoctypesystemidentifierloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BOGUS_DOCTYPE:
 | |
|                     for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '>':
 | |
|                                 /*
 | |
|                                  * U+003E GREATER-THAN SIGN (>) Emit that
 | |
|                                  * DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Stay in the bogus DOCTYPE
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case DOCTYPE_YSTEM:
 | |
|                     doctypeystemloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Otherwise, if the six characters starting from the
 | |
|                          * current input character are an ASCII case-insensitive
 | |
|                          * match for the word "SYSTEM", then consume those
 | |
|                          * characters and switch to the before DOCTYPE system
 | |
|                          * identifier state.
 | |
|                          */
 | |
|                         if (index < 5) { // YSTEM.length
 | |
|                             char folded = c;
 | |
|                             if (c >= 'A' && c <= 'Z') {
 | |
|                                 folded += 0x20;
 | |
|                             }
 | |
|                             if (folded != Tokenizer.YSTEM[index]) {
 | |
|                                 bogusDoctype();
 | |
|                                 reconsume = true;
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             }
 | |
|                             index++;
 | |
|                             continue stateloop;
 | |
|                         } else {
 | |
|                             reconsume = true;
 | |
|                             state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
 | |
|                             break doctypeystemloop;
 | |
|                             // continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case AFTER_DOCTYPE_SYSTEM_KEYWORD:
 | |
|                     afterdoctypesystemkeywordloop: for (;;) {
 | |
|                         if (reconsume) {
 | |
|                             reconsume = false;
 | |
|                         } else {
 | |
|                             if (++pos == endPos) {
 | |
|                                 break stateloop;
 | |
|                             }
 | |
|                             c = checkChar(buf, pos);
 | |
|                         }
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE
 | |
|                                  * Switch to the before DOCTYPE public
 | |
|                                  * identifier state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
 | |
|                                 break afterdoctypesystemkeywordloop;
 | |
|                             // FALL THROUGH continue stateloop
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Parse Error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's system identifier to
 | |
|                                  * the empty string (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE public identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Parse Error.
 | |
|                                  */
 | |
|                                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's public identifier to
 | |
|                                  * the empty string (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE public identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
 | |
|                                 errExpectedPublicId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
 | |
|                     beforedoctypesystemidentifierloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\r':
 | |
|                                 silentCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 silentLineFeed();
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             case ' ':
 | |
|                             case '\t':
 | |
|                             case '\u000C':
 | |
|                                 /*
 | |
|                                  * U+0009 CHARACTER TABULATION U+000A LINE FEED
 | |
|                                  * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
 | |
|                                  * in the before DOCTYPE system identifier
 | |
|                                  * state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                             case '"':
 | |
|                                 /*
 | |
|                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
 | |
|                                  * token's system identifier to the empty string
 | |
|                                  * (not missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE system identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
 | |
|                                  * system identifier to the empty string (not
 | |
|                                  * missing),
 | |
|                                  */
 | |
|                                 clearStrBufBeforeUse();
 | |
|                                 /*
 | |
|                                  * then switch to the DOCTYPE system identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
 | |
|                                 break beforedoctypesystemidentifierloop;
 | |
|                             // continue stateloop;
 | |
|                             case '>':
 | |
|                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
 | |
|                                 errExpectedSystemId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             default:
 | |
|                                 bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 // done by bogusDoctype();
 | |
|                                 /*
 | |
|                                  * Switch to the bogus DOCTYPE state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Switch to the after
 | |
|                                  * DOCTYPE system identifier state.
 | |
|                                  */
 | |
|                                 systemIdentifier = strBufToString();
 | |
|                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 errGtInSystemId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 systemIdentifier = strBufToString();
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current DOCTYPE token's
 | |
|                                  * system identifier.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the DOCTYPE system identifier
 | |
|                                  * (double-quoted) state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
 | |
|                     for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         /*
 | |
|                          * Consume the next input character:
 | |
|                          */
 | |
|                         switch (c) {
 | |
|                             case '\'':
 | |
|                                 /*
 | |
|                                  * U+0027 APOSTROPHE (') Switch to the after
 | |
|                                  * DOCTYPE public identifier state.
 | |
|                                  */
 | |
|                                 publicIdentifier = strBufToString();
 | |
|                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '>':
 | |
|                                 errGtInPublicId();
 | |
|                                 /*
 | |
|                                  * Set the DOCTYPE token's force-quirks flag to
 | |
|                                  * on.
 | |
|                                  */
 | |
|                                 forceQuirks = true;
 | |
|                                 /*
 | |
|                                  * Emit that DOCTYPE token.
 | |
|                                  */
 | |
|                                 publicIdentifier = strBufToString();
 | |
|                                 emitDoctypeToken(pos);
 | |
|                                 /*
 | |
|                                  * Switch to the data state.
 | |
|                                  */
 | |
|                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
 | |
|                                 continue stateloop;
 | |
|                             case '\r':
 | |
|                                 appendStrBufCarriageReturn();
 | |
|                                 break stateloop;
 | |
|                             case '\n':
 | |
|                                 appendStrBufLineFeed();
 | |
|                                 continue;
 | |
|                             case '\u0000':
 | |
|                                 c = '\uFFFD';
 | |
|                                 // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                             default:
 | |
|                                 /*
 | |
|                                  * Anything else Append the current input
 | |
|                                  * character to the current DOCTYPE token's
 | |
|                                  * public identifier.
 | |
|                                  */
 | |
|                                 appendStrBuf(c);
 | |
|                                 /*
 | |
|                                  * Stay in the DOCTYPE public identifier
 | |
|                                  * (single-quoted) state.
 | |
|                                  */
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                 case PROCESSING_INSTRUCTION:
 | |
|                     processinginstructionloop: for (;;) {
 | |
|                         if (++pos == endPos) {
 | |
|                             break stateloop;
 | |
|                         }
 | |
|                         c = checkChar(buf, pos);
 | |
|                         switch (c) {
 | |
|                             case '?':
 | |
|                                 state = transition(
 | |
|                                         state,
 | |
|                                         Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
 | |
|                                         reconsume, pos);
 | |
|                                 break processinginstructionloop;
 | |
|                             // continue stateloop;
 | |
|                             default:
 | |
|                                 continue;
 | |
|                         }
 | |
|                     }
 | |
|                     // CPPONLY: MOZ_FALLTHROUGH;
 | |
|                 case PROCESSING_INSTRUCTION_QUESTION_MARK:
 | |
|                     if (++pos == endPos) {
 | |
|                         break stateloop;
 | |
|                     }
 | |
|                     c = checkChar(buf, pos);
 | |
|                     switch (c) {
 | |
|                         case '>':
 | |
|                             state = transition(state, Tokenizer.DATA,
 | |
|                                     reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                         default:
 | |
|                             state = transition(state,
 | |
|                                     Tokenizer.PROCESSING_INSTRUCTION,
 | |
|                                     reconsume, pos);
 | |
|                             continue stateloop;
 | |
|                     }
 | |
|                     // END HOTSPOT WORKAROUND
 | |
|             }
 | |
|         }
 | |
|         flushChars(buf, pos);
 | |
|         /*
 | |
|          * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
 | |
|          */
 | |
|         // Save locals
 | |
|         stateSave = state;
 | |
|         returnStateSave = returnState;
 | |
|         return pos;
 | |
|     }
 | |
| 
 | |
|     // HOTSPOT WORKAROUND INSERTION POINT
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException {
 | |
|         return to;
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     private void initDoctypeFields() {
 | |
|         // Discard the characters "DOCTYPE" accumulated as a potential bogus
 | |
|         // comment into strBuf.
 | |
|         clearStrBufAfterUse();
 | |
|         doctypeName = "";
 | |
|         if (systemIdentifier != null) {
 | |
|             Portability.releaseString(systemIdentifier);
 | |
|             systemIdentifier = null;
 | |
|         }
 | |
|         if (publicIdentifier != null) {
 | |
|             Portability.releaseString(publicIdentifier);
 | |
|             publicIdentifier = null;
 | |
|         }
 | |
|         forceQuirks = false;
 | |
|     }
 | |
| 
 | |
|     @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
 | |
|             throws SAXException {
 | |
|         silentCarriageReturn();
 | |
|         adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
 | |
|     }
 | |
| 
 | |
|     @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
 | |
|             throws SAXException {
 | |
|         silentLineFeed();
 | |
|         adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
 | |
|     }
 | |
| 
 | |
|     @Inline private void appendStrBufLineFeed() {
 | |
|         silentLineFeed();
 | |
|         appendStrBuf('\n');
 | |
|     }
 | |
| 
 | |
|     @Inline private void appendStrBufCarriageReturn() {
 | |
|         silentCarriageReturn();
 | |
|         appendStrBuf('\n');
 | |
|     }
 | |
| 
 | |
|     @Inline protected void silentCarriageReturn() {
 | |
|         ++line;
 | |
|         lastCR = true;
 | |
|     }
 | |
| 
 | |
|     @Inline protected void silentLineFeed() {
 | |
|         ++line;
 | |
|     }
 | |
| 
 | |
|     private void emitCarriageReturn(@NoLength char[] buf, int pos)
 | |
|             throws SAXException {
 | |
|         silentCarriageReturn();
 | |
|         flushChars(buf, pos);
 | |
|         tokenHandler.characters(Tokenizer.LF, 0, 1);
 | |
|         cstart = Integer.MAX_VALUE;
 | |
|     }
 | |
| 
 | |
|     private void emitReplacementCharacter(@NoLength char[] buf, int pos)
 | |
|             throws SAXException {
 | |
|         flushChars(buf, pos);
 | |
|         tokenHandler.zeroOriginatingReplacementCharacter();
 | |
|         cstart = pos + 1;
 | |
|     }
 | |
| 
 | |
|     private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
 | |
|             throws SAXException {
 | |
|         flushChars(buf, pos);
 | |
|         tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
 | |
|         cstart = pos + 1;
 | |
|     }
 | |
| 
 | |
|     private void setAdditionalAndRememberAmpersandLocation(char add) {
 | |
|         additional = add;
 | |
|         // [NOCPP[
 | |
|         ampersandLocation = new LocatorImpl(this);
 | |
|         // ]NOCPP]
 | |
|     }
 | |
| 
 | |
|     private void bogusDoctype() throws SAXException {
 | |
|         errBogusDoctype();
 | |
|         forceQuirks = true;
 | |
|     }
 | |
| 
 | |
|     private void bogusDoctypeWithoutQuirks() throws SAXException {
 | |
|         errBogusDoctype();
 | |
|         forceQuirks = false;
 | |
|     }
 | |
| 
 | |
|     private void handleNcrValue(int returnState) throws SAXException {
 | |
|         /*
 | |
|          * If one or more characters match the range, then take them all and
 | |
|          * interpret the string of characters as a number (either hexadecimal or
 | |
|          * decimal as appropriate).
 | |
|          */
 | |
|         if (value <= 0xFFFF) {
 | |
|             if (value >= 0x80 && value <= 0x9f) {
 | |
|                 /*
 | |
|                  * If that number is one of the numbers in the first column of
 | |
|                  * the following table, then this is a parse error.
 | |
|                  */
 | |
|                 errNcrInC1Range();
 | |
|                 /*
 | |
|                  * Find the row with that number in the first column, and return
 | |
|                  * a character token for the Unicode character given in the
 | |
|                  * second column of that row.
 | |
|                  */
 | |
|                 @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
 | |
|                 emitOrAppendOne(val, returnState);
 | |
|                 // [NOCPP[
 | |
|             } else if (value == 0xC
 | |
|                     && contentSpacePolicy != XmlViolationPolicy.ALLOW) {
 | |
|                 if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
 | |
|                     emitOrAppendOne(Tokenizer.SPACE, returnState);
 | |
|                 } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) {
 | |
|                     fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
 | |
|                 }
 | |
|                 // ]NOCPP]
 | |
|             } else if (value == 0x0) {
 | |
|                 errNcrZero();
 | |
|                 emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
 | |
|             } else if ((value & 0xF800) == 0xD800) {
 | |
|                 errNcrSurrogate();
 | |
|                 emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
 | |
|             } else {
 | |
|                 /*
 | |
|                  * Otherwise, return a character token for the Unicode character
 | |
|                  * whose code point is that number.
 | |
|                  */
 | |
|                 char ch = (char) value;
 | |
|                 // [NOCPP[
 | |
|                 if (value == 0x0D) {
 | |
|                     errNcrCr();
 | |
|                 } else if ((value <= 0x0008) || (value == 0x000B)
 | |
|                         || (value >= 0x000E && value <= 0x001F)) {
 | |
|                     ch = errNcrControlChar(ch);
 | |
|                 } else if (value >= 0xFDD0 && value <= 0xFDEF) {
 | |
|                     errNcrUnassigned();
 | |
|                 } else if ((value & 0xFFFE) == 0xFFFE) {
 | |
|                     ch = errNcrNonCharacter(ch);
 | |
|                 } else if (value >= 0x007F && value <= 0x009F) {
 | |
|                     errNcrControlChar();
 | |
|                 } else {
 | |
|                     maybeWarnPrivateUse(ch);
 | |
|                 }
 | |
|                 // ]NOCPP]
 | |
|                 bmpChar[0] = ch;
 | |
|                 emitOrAppendOne(bmpChar, returnState);
 | |
|             }
 | |
|         } else if (value <= 0x10FFFF) {
 | |
|             // [NOCPP[
 | |
|             maybeWarnPrivateUseAstral();
 | |
|             if ((value & 0xFFFE) == 0xFFFE) {
 | |
|                 errAstralNonCharacter(value);
 | |
|             }
 | |
|             // ]NOCPP]
 | |
|             astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
 | |
|             astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
 | |
|             emitOrAppendTwo(astralChar, returnState);
 | |
|         } else {
 | |
|             errNcrOutOfRange();
 | |
|             emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public void eof() throws SAXException {
 | |
|         int state = stateSave;
 | |
|         int returnState = returnStateSave;
 | |
| 
 | |
|         eofloop: for (;;) {
 | |
|             switch (state) {
 | |
|                 case SCRIPT_DATA_LESS_THAN_SIGN:
 | |
|                 case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
 | |
|                     /*
 | |
|                      * Otherwise, emit a U+003C LESS-THAN SIGN character token
 | |
|                      */
 | |
|                     tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                     /*
 | |
|                      * and reconsume the current input character in the data
 | |
|                      * state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case TAG_OPEN:
 | |
|                     /*
 | |
|                      * The behavior of this state depends on the content model
 | |
|                      * flag.
 | |
|                      */
 | |
|                     /*
 | |
|                      * Anything else Parse error.
 | |
|                      */
 | |
|                     errEofAfterLt();
 | |
|                     /*
 | |
|                      * Emit a U+003C LESS-THAN SIGN character token
 | |
|                      */
 | |
|                     tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                     /*
 | |
|                      * and reconsume the current input character in the data
 | |
|                      * state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case RAWTEXT_RCDATA_LESS_THAN_SIGN:
 | |
|                     /*
 | |
|                      * Emit a U+003C LESS-THAN SIGN character token
 | |
|                      */
 | |
|                     tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
 | |
|                     /*
 | |
|                      * and reconsume the current input character in the RCDATA
 | |
|                      * state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case NON_DATA_END_TAG_NAME:
 | |
|                     /*
 | |
|                      * Emit a U+003C LESS-THAN SIGN character token, a U+002F
 | |
|                      * SOLIDUS character token,
 | |
|                      */
 | |
|                     tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
 | |
|                     /*
 | |
|                      * a character token for each of the characters in the
 | |
|                      * temporary buffer (in the order they were added to the
 | |
|                      * buffer),
 | |
|                      */
 | |
|                     emitStrBuf();
 | |
|                     /*
 | |
|                      * and reconsume the current input character in the RCDATA
 | |
|                      * state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case CLOSE_TAG_OPEN:
 | |
|                     /* EOF Parse error. */
 | |
|                     errEofAfterLt();
 | |
|                     /*
 | |
|                      * Emit a U+003C LESS-THAN SIGN character token and a U+002F
 | |
|                      * SOLIDUS character token.
 | |
|                      */
 | |
|                     tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case TAG_NAME:
 | |
|                     /*
 | |
|                      * EOF Parse error.
 | |
|                      */
 | |
|                     errEofInTagName();
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case BEFORE_ATTRIBUTE_NAME:
 | |
|                 case AFTER_ATTRIBUTE_VALUE_QUOTED:
 | |
|                 case SELF_CLOSING_START_TAG:
 | |
|                     /* EOF Parse error. */
 | |
|                     errEofWithoutGt();
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case ATTRIBUTE_NAME:
 | |
|                     /*
 | |
|                      * EOF Parse error.
 | |
|                      */
 | |
|                     errEofInAttributeName();
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case AFTER_ATTRIBUTE_NAME:
 | |
|                 case BEFORE_ATTRIBUTE_VALUE:
 | |
|                     /* EOF Parse error. */
 | |
|                     errEofWithoutGt();
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
 | |
|                 case ATTRIBUTE_VALUE_SINGLE_QUOTED:
 | |
|                 case ATTRIBUTE_VALUE_UNQUOTED:
 | |
|                     /* EOF Parse error. */
 | |
|                     errEofInAttributeValue();
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case BOGUS_COMMENT:
 | |
|                     emitComment(0, 0);
 | |
|                     break eofloop;
 | |
|                 case BOGUS_COMMENT_HYPHEN:
 | |
|                     // [NOCPP[
 | |
|                     maybeAppendSpaceToBogusComment();
 | |
|                     // ]NOCPP]
 | |
|                     emitComment(0, 0);
 | |
|                     break eofloop;
 | |
|                 case MARKUP_DECLARATION_OPEN:
 | |
|                     errBogusComment();
 | |
|                     emitComment(0, 0);
 | |
|                     break eofloop;
 | |
|                 case MARKUP_DECLARATION_HYPHEN:
 | |
|                     errBogusComment();
 | |
|                     emitComment(0, 0);
 | |
|                     break eofloop;
 | |
|                 case MARKUP_DECLARATION_OCTYPE:
 | |
|                     if (index < 6) {
 | |
|                         errBogusComment();
 | |
|                         emitComment(0, 0);
 | |
|                     } else {
 | |
|                         /* EOF Parse error. */
 | |
|                         errEofInDoctype();
 | |
|                         /*
 | |
|                          * Create a new DOCTYPE token. Set its force-quirks flag
 | |
|                          * to on.
 | |
|                          */
 | |
|                         doctypeName = "";
 | |
|                         if (systemIdentifier != null) {
 | |
|                             Portability.releaseString(systemIdentifier);
 | |
|                             systemIdentifier = null;
 | |
|                         }
 | |
|                         if (publicIdentifier != null) {
 | |
|                             Portability.releaseString(publicIdentifier);
 | |
|                             publicIdentifier = null;
 | |
|                         }
 | |
|                         forceQuirks = true;
 | |
|                         /*
 | |
|                          * Emit the token.
 | |
|                          */
 | |
|                         emitDoctypeToken(0);
 | |
|                         /*
 | |
|                          * Reconsume the EOF character in the data state.
 | |
|                          */
 | |
|                         break eofloop;
 | |
|                     }
 | |
|                     break eofloop;
 | |
|                 case COMMENT_START:
 | |
|                 case COMMENT:
 | |
|                     /*
 | |
|                      * EOF Parse error.
 | |
|                      */
 | |
|                     errEofInComment();
 | |
|                     /* Emit the comment token. */
 | |
|                     emitComment(0, 0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case COMMENT_END:
 | |
|                     errEofInComment();
 | |
|                     /* Emit the comment token. */
 | |
|                     emitComment(2, 0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case COMMENT_END_DASH:
 | |
|                 case COMMENT_START_DASH:
 | |
|                     errEofInComment();
 | |
|                     /* Emit the comment token. */
 | |
|                     emitComment(1, 0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case COMMENT_END_BANG:
 | |
|                     errEofInComment();
 | |
|                     /* Emit the comment token. */
 | |
|                     emitComment(3, 0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case DOCTYPE:
 | |
|                 case BEFORE_DOCTYPE_NAME:
 | |
|                     errEofInDoctype();
 | |
|                     /*
 | |
|                      * Create a new DOCTYPE token. Set its force-quirks flag to
 | |
|                      * on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit the token.
 | |
|                      */
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case DOCTYPE_NAME:
 | |
|                     errEofInDoctype();
 | |
|                     strBufToDoctypeName();
 | |
|                     /*
 | |
|                      * Set the DOCTYPE token's force-quirks flag to on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case DOCTYPE_UBLIC:
 | |
|                 case DOCTYPE_YSTEM:
 | |
|                 case AFTER_DOCTYPE_NAME:
 | |
|                 case AFTER_DOCTYPE_PUBLIC_KEYWORD:
 | |
|                 case AFTER_DOCTYPE_SYSTEM_KEYWORD:
 | |
|                 case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
 | |
|                     errEofInDoctype();
 | |
|                     /*
 | |
|                      * Set the DOCTYPE token's force-quirks flag to on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
 | |
|                 case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
 | |
|                     /* EOF Parse error. */
 | |
|                     errEofInPublicId();
 | |
|                     /*
 | |
|                      * Set the DOCTYPE token's force-quirks flag to on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     publicIdentifier = strBufToString();
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
 | |
|                 case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
 | |
|                 case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
 | |
|                     errEofInDoctype();
 | |
|                     /*
 | |
|                      * Set the DOCTYPE token's force-quirks flag to on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
 | |
|                 case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
 | |
|                     /* EOF Parse error. */
 | |
|                     errEofInSystemId();
 | |
|                     /*
 | |
|                      * Set the DOCTYPE token's force-quirks flag to on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     systemIdentifier = strBufToString();
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
 | |
|                     errEofInDoctype();
 | |
|                     /*
 | |
|                      * Set the DOCTYPE token's force-quirks flag to on.
 | |
|                      */
 | |
|                     forceQuirks = true;
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case BOGUS_DOCTYPE:
 | |
|                     /*
 | |
|                      * Emit that DOCTYPE token.
 | |
|                      */
 | |
|                     emitDoctypeToken(0);
 | |
|                     /*
 | |
|                      * Reconsume the EOF character in the data state.
 | |
|                      */
 | |
|                     break eofloop;
 | |
|                 case CONSUME_CHARACTER_REFERENCE:
 | |
|                     /*
 | |
|                      * Unlike the definition is the spec, this state does not
 | |
|                      * return a value and never requires the caller to
 | |
|                      * backtrack. This state takes care of emitting characters
 | |
|                      * or appending to the current attribute value. It also
 | |
|                      * takes care of that in the case when consuming the entity
 | |
|                      * fails.
 | |
|                      */
 | |
|                     /*
 | |
|                      * This section defines how to consume an entity. This
 | |
|                      * definition is used when parsing entities in text and in
 | |
|                      * attributes.
 | |
|                      *
 | |
|                      * The behavior depends on the identity of the next
 | |
|                      * character (the one immediately after the U+0026 AMPERSAND
 | |
|                      * character):
 | |
|                      */
 | |
| 
 | |
|                     emitOrAppendCharRefBuf(returnState);
 | |
|                     state = returnState;
 | |
|                     continue;
 | |
|                 case CHARACTER_REFERENCE_HILO_LOOKUP:
 | |
|                     errNoNamedCharacterMatch();
 | |
|                     emitOrAppendCharRefBuf(returnState);
 | |
|                     state = returnState;
 | |
|                     continue;
 | |
|                 case CHARACTER_REFERENCE_TAIL:
 | |
|                     outer: for (;;) {
 | |
|                         char c = '\u0000';
 | |
|                         entCol++;
 | |
|                         /*
 | |
|                          * Consume the maximum number of characters possible,
 | |
|                          * with the consumed characters matching one of the
 | |
|                          * identifiers in the first column of the named
 | |
|                          * character references table (in a case-sensitive
 | |
|                          * manner).
 | |
|                          */
 | |
|                         hiloop: for (;;) {
 | |
|                             if (hi == -1) {
 | |
|                                 break hiloop;
 | |
|                             }
 | |
|                             if (entCol == NamedCharacters.NAMES[hi].length()) {
 | |
|                                 break hiloop;
 | |
|                             }
 | |
|                             if (entCol > NamedCharacters.NAMES[hi].length()) {
 | |
|                                 break outer;
 | |
|                             } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
 | |
|                                 hi--;
 | |
|                             } else {
 | |
|                                 break hiloop;
 | |
|                             }
 | |
|                         }
 | |
| 
 | |
|                         loloop: for (;;) {
 | |
|                             if (hi < lo) {
 | |
|                                 break outer;
 | |
|                             }
 | |
|                             if (entCol == NamedCharacters.NAMES[lo].length()) {
 | |
|                                 candidate = lo;
 | |
|                                 charRefBufMark = charRefBufLen;
 | |
|                                 lo++;
 | |
|                             } else if (entCol > NamedCharacters.NAMES[lo].length()) {
 | |
|                                 break outer;
 | |
|                             } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
 | |
|                                 lo++;
 | |
|                             } else {
 | |
|                                 break loloop;
 | |
|                             }
 | |
|                         }
 | |
|                         if (hi < lo) {
 | |
|                             break outer;
 | |
|                         }
 | |
|                         continue;
 | |
|                     }
 | |
| 
 | |
|                     if (candidate == -1) {
 | |
|                         /*
 | |
|                          * If no match can be made, then this is a parse error.
 | |
|                          */
 | |
|                         errNoNamedCharacterMatch();
 | |
|                         emitOrAppendCharRefBuf(returnState);
 | |
|                         state = returnState;
 | |
|                         continue eofloop;
 | |
|                     } else {
 | |
|                         @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
 | |
|                         if (candidateName.length() == 0
 | |
|                                 || candidateName.charAt(candidateName.length() - 1) != ';') {
 | |
|                             /*
 | |
|                              * If the last character matched is not a U+003B
 | |
|                              * SEMICOLON (;), there is a parse error.
 | |
|                              */
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|                                 /*
 | |
|                                  * If the entity is being consumed as part of an
 | |
|                                  * attribute, and the last character matched is
 | |
|                                  * not a U+003B SEMICOLON (;),
 | |
|                                  */
 | |
|                                 char ch;
 | |
|                                 if (charRefBufMark == charRefBufLen) {
 | |
|                                     ch = '\u0000';
 | |
|                                 } else {
 | |
|                                     ch = charRefBuf[charRefBufMark];
 | |
|                                 }
 | |
|                                 if ((ch >= '0' && ch <= '9')
 | |
|                                         || (ch >= 'A' && ch <= 'Z')
 | |
|                                         || (ch >= 'a' && ch <= 'z')) {
 | |
|                                     /*
 | |
|                                      * and the next character is in the range
 | |
|                                      * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
 | |
|                                      * U+0041 LATIN CAPITAL LETTER A to U+005A
 | |
|                                      * LATIN CAPITAL LETTER Z, or U+0061 LATIN
 | |
|                                      * SMALL LETTER A to U+007A LATIN SMALL
 | |
|                                      * LETTER Z, then, for historical reasons,
 | |
|                                      * all the characters that were matched
 | |
|                                      * after the U+0026 AMPERSAND (&) must be
 | |
|                                      * unconsumed, and nothing is returned.
 | |
|                                      */
 | |
|                                     errNoNamedCharacterMatch();
 | |
|                                     appendCharRefBufToStrBuf();
 | |
|                                     state = returnState;
 | |
|                                     continue eofloop;
 | |
|                                 }
 | |
|                             }
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|                                 errUnescapedAmpersandInterpretedAsCharacterReference();
 | |
|                             } else {
 | |
|                                 errNotSemicolonTerminated();
 | |
|                             }
 | |
|                         }
 | |
| 
 | |
|                         /*
 | |
|                          * Otherwise, return a character token for the character
 | |
|                          * corresponding to the entity name (as given by the
 | |
|                          * second column of the named character references
 | |
|                          * table).
 | |
|                          */
 | |
|                         @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
 | |
|                         if (
 | |
|                         // [NOCPP[
 | |
|                         val.length == 1
 | |
|                         // ]NOCPP]
 | |
|                         // CPPONLY: val[1] == 0
 | |
|                         ) {
 | |
|                             emitOrAppendOne(val, returnState);
 | |
|                         } else {
 | |
|                             emitOrAppendTwo(val, returnState);
 | |
|                         }
 | |
|                         // this is so complicated!
 | |
|                         if (charRefBufMark < charRefBufLen) {
 | |
|                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|                                 appendStrBuf(charRefBuf, charRefBufMark,
 | |
|                                         charRefBufLen - charRefBufMark);
 | |
|                             } else {
 | |
|                                 tokenHandler.characters(charRefBuf, charRefBufMark,
 | |
|                                         charRefBufLen - charRefBufMark);
 | |
|                             }
 | |
|                         }
 | |
|                         charRefBufLen = 0;
 | |
|                         state = returnState;
 | |
|                         continue eofloop;
 | |
|                         /*
 | |
|                          * If the markup contains I'm ¬it; I tell you, the
 | |
|                          * entity is parsed as "not", as in, I'm ¬it; I tell
 | |
|                          * you. But if the markup was I'm ∉ I tell you,
 | |
|                          * the entity would be parsed as "notin;", resulting in
 | |
|                          * I'm ∉ I tell you.
 | |
|                          */
 | |
|                     }
 | |
|                 case CONSUME_NCR:
 | |
|                 case DECIMAL_NRC_LOOP:
 | |
|                 case HEX_NCR_LOOP:
 | |
|                     /*
 | |
|                      * If no characters match the range, then don't consume any
 | |
|                      * characters (and unconsume the U+0023 NUMBER SIGN
 | |
|                      * character and, if appropriate, the X character). This is
 | |
|                      * a parse error; nothing is returned.
 | |
|                      *
 | |
|                      * Otherwise, if the next character is a U+003B SEMICOLON,
 | |
|                      * consume that too. If it isn't, there is a parse error.
 | |
|                      */
 | |
|                     if (!seenDigits) {
 | |
|                         errNoDigitsInNCR();
 | |
|                         emitOrAppendCharRefBuf(returnState);
 | |
|                         state = returnState;
 | |
|                         continue;
 | |
|                     } else {
 | |
|                         errCharRefLacksSemicolon();
 | |
|                     }
 | |
|                     // WARNING previous state sets reconsume
 | |
|                     handleNcrValue(returnState);
 | |
|                     state = returnState;
 | |
|                     continue;
 | |
|                 case CDATA_RSQB:
 | |
|                     tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
 | |
|                     break eofloop;
 | |
|                 case CDATA_RSQB_RSQB:
 | |
|                     tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
 | |
|                     break eofloop;
 | |
|                 case DATA:
 | |
|                 default:
 | |
|                     break eofloop;
 | |
|             }
 | |
|         }
 | |
|         // case DATA:
 | |
|         /*
 | |
|          * EOF Emit an end-of-file token.
 | |
|          */
 | |
|         tokenHandler.eof();
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     private void emitDoctypeToken(int pos) throws SAXException {
 | |
|         cstart = pos + 1;
 | |
|         tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
 | |
|                 forceQuirks);
 | |
|         // It is OK and sufficient to release these here, since
 | |
|         // there's no way out of the doctype states than through paths
 | |
|         // that call this method.
 | |
|         doctypeName = null;
 | |
|         Portability.releaseString(publicIdentifier);
 | |
|         publicIdentifier = null;
 | |
|         Portability.releaseString(systemIdentifier);
 | |
|         systemIdentifier = null;
 | |
|     }
 | |
| 
 | |
|     @Inline protected char checkChar(@NoLength char[] buf, int pos)
 | |
|             throws SAXException {
 | |
|         return buf[pos];
 | |
|     }
 | |
| 
 | |
|     public boolean internalEncodingDeclaration(String internalCharset)
 | |
|             throws SAXException {
 | |
|         if (encodingDeclarationHandler != null) {
 | |
|             return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset);
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * @param val
 | |
|      * @throws SAXException
 | |
|      */
 | |
|     private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
 | |
|             throws SAXException {
 | |
|         if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|             appendStrBuf(val[0]);
 | |
|             appendStrBuf(val[1]);
 | |
|         } else {
 | |
|             tokenHandler.characters(val, 0, 2);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
 | |
|             throws SAXException {
 | |
|         if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
 | |
|             appendStrBuf(val[0]);
 | |
|         } else {
 | |
|             tokenHandler.characters(val, 0, 1);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public void end() throws SAXException {
 | |
|         strBuf = null;
 | |
|         doctypeName = null;
 | |
|         if (systemIdentifier != null) {
 | |
|             Portability.releaseString(systemIdentifier);
 | |
|             systemIdentifier = null;
 | |
|         }
 | |
|         if (publicIdentifier != null) {
 | |
|             Portability.releaseString(publicIdentifier);
 | |
|             publicIdentifier = null;
 | |
|         }
 | |
|         tagName = null;
 | |
|         nonInternedTagName.setNameForNonInterned(null
 | |
|                 // CPPONLY: , false
 | |
|                 );
 | |
|         attributeName = null;
 | |
|         // CPPONLY: nonInternedAttributeName.setNameForNonInterned(null);
 | |
|         tokenHandler.endTokenization();
 | |
|         if (attributes != null) {
 | |
|             // [NOCPP[
 | |
|             attributes = null;
 | |
|             // ]NOCPP]
 | |
|             // CPPONLY: attributes.clear(mappingLangToXmlLang);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public void requestSuspension() {
 | |
|         shouldSuspend = true;
 | |
|     }
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     public void becomeConfident() {
 | |
|         confident = true;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns the nextCharOnNewLine.
 | |
|      *
 | |
|      * @return the nextCharOnNewLine
 | |
|      */
 | |
|     public boolean isNextCharOnNewLine() {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     public boolean isPrevCR() {
 | |
|         return lastCR;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns the line.
 | |
|      *
 | |
|      * @return the line
 | |
|      */
 | |
|     public int getLine() {
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns the col.
 | |
|      *
 | |
|      * @return the col
 | |
|      */
 | |
|     public int getCol() {
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
|     public boolean isInDataState() {
 | |
|         return (stateSave == DATA);
 | |
|     }
 | |
| 
 | |
|     public void resetToDataState() {
 | |
|         clearStrBufAfterUse();
 | |
|         charRefBufLen = 0;
 | |
|         stateSave = Tokenizer.DATA;
 | |
|         // line = 1; XXX line numbers
 | |
|         lastCR = false;
 | |
|         index = 0;
 | |
|         forceQuirks = false;
 | |
|         additional = '\u0000';
 | |
|         entCol = -1;
 | |
|         firstCharKey = -1;
 | |
|         lo = 0;
 | |
|         hi = 0; // will always be overwritten before use anyway
 | |
|         candidate = -1;
 | |
|         charRefBufMark = 0;
 | |
|         value = 0;
 | |
|         seenDigits = false;
 | |
|         endTag = false;
 | |
|         shouldSuspend = false;
 | |
|         initDoctypeFields();
 | |
|         containsHyphen = false;
 | |
|         tagName = null;
 | |
|         attributeName = null;
 | |
|         if (newAttributesEachTime) {
 | |
|             if (attributes != null) {
 | |
|                 Portability.delete(attributes);
 | |
|                 attributes = null;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public void loadState(Tokenizer other) throws SAXException {
 | |
|         strBufLen = other.strBufLen;
 | |
|         if (strBufLen > strBuf.length) {
 | |
|             strBuf = new char[strBufLen];
 | |
|         }
 | |
|         System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
 | |
| 
 | |
|         charRefBufLen = other.charRefBufLen;
 | |
|         System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen);
 | |
| 
 | |
|         stateSave = other.stateSave;
 | |
|         returnStateSave = other.returnStateSave;
 | |
|         endTagExpectation = other.endTagExpectation;
 | |
|         endTagExpectationAsArray = other.endTagExpectationAsArray;
 | |
|         // line = 1; XXX line numbers
 | |
|         lastCR = other.lastCR;
 | |
|         index = other.index;
 | |
|         forceQuirks = other.forceQuirks;
 | |
|         additional = other.additional;
 | |
|         entCol = other.entCol;
 | |
|         firstCharKey = other.firstCharKey;
 | |
|         lo = other.lo;
 | |
|         hi = other.hi;
 | |
|         candidate = other.candidate;
 | |
|         charRefBufMark = other.charRefBufMark;
 | |
|         value = other.value;
 | |
|         seenDigits = other.seenDigits;
 | |
|         endTag = other.endTag;
 | |
|         shouldSuspend = false;
 | |
| 
 | |
|         if (other.doctypeName == null) {
 | |
|             doctypeName = null;
 | |
|         } else {
 | |
|             doctypeName = Portability.newLocalFromLocal(other.doctypeName,
 | |
|                     interner);
 | |
|         }
 | |
| 
 | |
|         Portability.releaseString(systemIdentifier);
 | |
|         if (other.systemIdentifier == null) {
 | |
|             systemIdentifier = null;
 | |
|         } else {
 | |
|             systemIdentifier = Portability.newStringFromString(other.systemIdentifier);
 | |
|         }
 | |
| 
 | |
|         Portability.releaseString(publicIdentifier);
 | |
|         if (other.publicIdentifier == null) {
 | |
|             publicIdentifier = null;
 | |
|         } else {
 | |
|             publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
 | |
|         }
 | |
| 
 | |
|         containsHyphen = other.containsHyphen;
 | |
|         if (other.tagName == null) {
 | |
|             tagName = null;
 | |
|         } else if (other.tagName.isInterned()) {
 | |
|             tagName = other.tagName;
 | |
|         } else {
 | |
|             // In the C++ case, the atoms in the other tokenizer are from a
 | |
|             // different tokenizer-scoped atom table. Therefore, we have to
 | |
|             // obtain the correspoding atom from our own atom table.
 | |
|             nonInternedTagName.setNameForNonInterned(Portability.newLocalFromLocal(other.tagName.getName(), interner)
 | |
|                     // CPPONLY: , other.tagName.isCustom()
 | |
|                     );
 | |
|             tagName = nonInternedTagName;
 | |
|         }
 | |
| 
 | |
|         // [NOCPP[
 | |
|         attributeName = other.attributeName;
 | |
|         // ]NOCPP]
 | |
|         // CPPONLY: if (other.attributeName == null) {
 | |
|         // CPPONLY:     attributeName = null;
 | |
|         // CPPONLY: } else if (other.attributeName.isInterned()) {
 | |
|         // CPPONLY:     attributeName = other.attributeName;
 | |
|         // CPPONLY: } else {
 | |
|         // CPPONLY:     // In the C++ case, the atoms in the other tokenizer are from a
 | |
|         // CPPONLY:     // different tokenizer-scoped atom table. Therefore, we have to
 | |
|         // CPPONLY:     // obtain the correspoding atom from our own atom table.
 | |
|         // CPPONLY:     nonInternedAttributeName.setNameForNonInterned(Portability.newLocalFromLocal(other.attributeName.getLocal(AttributeName.HTML), interner));
 | |
|         // CPPONLY:     attributeName = nonInternedAttributeName;
 | |
|         // CPPONLY: }
 | |
| 
 | |
|         Portability.delete(attributes);
 | |
|         if (other.attributes == null) {
 | |
|             attributes = null;
 | |
|         } else {
 | |
|             attributes = other.attributes.cloneAttributes(interner);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public void initializeWithoutStarting() throws SAXException {
 | |
|         confident = false;
 | |
|         strBuf = null;
 | |
|         line = 1;
 | |
|         // CPPONLY: attributeLine = 1;
 | |
|         // [NOCPP[
 | |
|         html4 = false;
 | |
|         metaBoundaryPassed = false;
 | |
|         wantsComments = tokenHandler.wantsComments();
 | |
|         if (!newAttributesEachTime) {
 | |
|             attributes = new HtmlAttributes(mappingLangToXmlLang);
 | |
|         }
 | |
|         // ]NOCPP]
 | |
|         resetToDataState();
 | |
|     }
 | |
| 
 | |
|     protected void errGarbageAfterLtSlash() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errLtSlashGt() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errWarnLtSlashInRcdata() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errHtml4LtSlashInRcdata(char folded) throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errCharRefLacksSemicolon() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNoDigitsInNCR() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errGtInSystemId() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errGtInPublicId() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNamelessDoctype() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errConsecutiveHyphens() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errPrematureEndOfComment() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errBogusComment() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errUnquotedAttributeValOrNull(char c) throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errSlashNotFollowedByGt() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errHtml4XmlVoidSyntax() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNoSpaceBetweenAttributes() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errHtml4NonNameInUnquotedAttribute(char c)
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errAttributeValueMissing() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errBadCharBeforeAttributeNameOrNull(char c)
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEqualsSignBeforeAttributeName() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errBadCharAfterLt(char c) throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errLtGt() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errProcessingInstruction() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errUnescapedAmpersandInterpretedAsCharacterReference()
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNotSemicolonTerminated() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNoNamedCharacterMatch() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errQuoteBeforeAttributeName(char c) throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errQuoteOrLtInAttributeNameOrNull(char c)
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errExpectedPublicId() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errBogusDoctype() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void maybeWarnPrivateUseAstral() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void maybeWarnPrivateUse(char ch) throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void maybeErrSlashInEndTag(boolean selfClosing)
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected char errNcrNonCharacter(char ch) throws SAXException {
 | |
|         return ch;
 | |
|     }
 | |
| 
 | |
|     protected void errAstralNonCharacter(int ch) throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNcrSurrogate() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected char errNcrControlChar(char ch) throws SAXException {
 | |
|         return ch;
 | |
|     }
 | |
| 
 | |
|     protected void errNcrCr() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNcrInC1Range() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInPublicId() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInComment() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInDoctype() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInAttributeValue() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInAttributeName() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofWithoutGt() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInTagName() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInEndTag() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofAfterLt() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNcrOutOfRange() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNcrUnassigned() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errDuplicateAttribute() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errEofInSystemId() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errExpectedSystemId() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errHyphenHyphenBang() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNcrControlChar() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNcrZero() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
 | |
|             throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void noteAttributeWithoutValue() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     protected void noteUnquotedAttributeValue() throws SAXException {
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Sets the encodingDeclarationHandler.
 | |
|      *
 | |
|      * @param encodingDeclarationHandler
 | |
|      *            the encodingDeclarationHandler to set
 | |
|      */
 | |
|     public void setEncodingDeclarationHandler(
 | |
|             EncodingDeclarationHandler encodingDeclarationHandler) {
 | |
|         this.encodingDeclarationHandler = encodingDeclarationHandler;
 | |
|     }
 | |
| 
 | |
|     void destructor() {
 | |
|         Portability.delete(nonInternedTagName);
 | |
|         // CPPONLY: Portability.delete(nonInternedAttributeName);
 | |
|         nonInternedTagName = null;
 | |
|         // The translator will write refcount tracing stuff here
 | |
|         Portability.delete(attributes);
 | |
|         attributes = null;
 | |
|     }
 | |
| 
 | |
|     // [NOCPP[
 | |
| 
 | |
|     /**
 | |
|      * Sets an offset to be added to the position reported to
 | |
|      * <code>TransitionHandler</code>.
 | |
|      *
 | |
|      * @param offset the offset
 | |
|      */
 | |
|     public void setTransitionBaseOffset(int offset) {
 | |
| 
 | |
|     }
 | |
| 
 | |
|     // ]NOCPP]
 | |
| 
 | |
| }
 |