forked from mirrors/gecko-dev
		
	 be31dc3ce1
			
		
	
	
		be31dc3ce1
		
	
	
	
	
		
			
			Differential Revision: https://phabricator.services.mozilla.com/D69886 --HG-- rename : third_party/rust/jsparagus-emitter/src/scope_pass.rs => third_party/rust/jsparagus-scope/src/context.rs rename : third_party/rust/jsparagus-emitter/src/scope.rs => third_party/rust/jsparagus-scope/src/data.rs rename : third_party/rust/jsparagus-emitter/src/frame_slot.rs => third_party/rust/jsparagus-scope/src/frame_slot.rs extra : moz-landing-system : lando
		
			
				
	
	
		
			129 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			129 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """ Functions for loading the ECMAScript lexical and syntactic grammars. """
 | |
| 
 | |
| from jsparagus.ordered import OrderedSet, OrderedFrozenSet
 | |
| from jsparagus import gen, grammar
 | |
| from .lexer import ECMASCRIPT_FULL_KEYWORDS, ECMASCRIPT_CONDITIONAL_KEYWORDS
 | |
| from .parse_esgrammar import parse_esgrammar
 | |
| 
 | |
| 
 | |
| ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS: grammar.SyntheticTerminalsDict = {
 | |
|     # Theoretically, this should be the set of all Unicode characters, but that
 | |
|     # would take a lot of memory, and in practice, the set is not used.
 | |
|     'SourceCharacter': OrderedFrozenSet([]),
 | |
| }
 | |
| 
 | |
| ECMASCRIPT_LEXICAL_GOAL_NTS = [
 | |
|     'WhiteSpace',
 | |
|     'InputElementDiv',
 | |
|     'InputElementRegExp',
 | |
| ]
 | |
| 
 | |
| 
 | |
| def load_lexical_grammar(filename):
 | |
|     """Load the ECMAScript lexical grammar."""
 | |
|     with open(filename) as f:
 | |
|         grammar_text = f.read()
 | |
|     g = parse_esgrammar(
 | |
|         grammar_text,
 | |
|         filename=filename,
 | |
|         goals=ECMASCRIPT_LEXICAL_GOAL_NTS,
 | |
|         synthetic_terminals=ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS,
 | |
|         terminal_names=ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS.keys())
 | |
|     return gen.expand_parameterized_nonterminals(g)
 | |
| 
 | |
| 
 | |
| ECMASCRIPT_SYNTACTIC_GOAL_NTS = [
 | |
|     'Script',
 | |
|     'Module',
 | |
|     # 'FormalParameters',
 | |
|     # 'FunctionBody',
 | |
| ]
 | |
| 
 | |
| # Identifiers are complicated. A "synthetic terminal" is a shorthand symbol
 | |
| # that stands for any one of a set of terminals. For example, *IdentifierName*
 | |
| # stands for any token that looks like an identifier, including keywords.
 | |
| #
 | |
| # These sets must use the names of the terminals produced by the lexer.  Except
 | |
| # for `Name`, our lexer output uses the terminal symbols of the syntactic
 | |
| # grammar, which include some nonterminals of the lexical grammar. The
 | |
| # syntactic grammar uses `BooleanLiteral`, not `true` and `false`; and it uses
 | |
| # `NullLiteral` instead of `null`.
 | |
| ECMASCRIPT_SYNTHETIC_TERMINALS = {
 | |
|     'IdentifierName': OrderedSet([
 | |
|         'Name',
 | |
|         'BooleanLiteral',
 | |
|         'NullLiteral',
 | |
|         'NameWithEscape',
 | |
|         *ECMASCRIPT_FULL_KEYWORDS,
 | |
|         *ECMASCRIPT_CONDITIONAL_KEYWORDS
 | |
|     ]) - OrderedSet(['true', 'false', 'null']),
 | |
|     'Identifier': OrderedSet([
 | |
|         'Name',
 | |
|         'NameWithEscape',
 | |
|         *ECMASCRIPT_CONDITIONAL_KEYWORDS
 | |
|     ]),
 | |
| }
 | |
| 
 | |
| # Lexical nonterminals that are used as terminals in the syntactic grammar.
 | |
| ECMASCRIPT_TOKEN_NAMES = [
 | |
|     'BooleanLiteral',
 | |
|     'IdentifierName',
 | |
|     'PrivateIdentifier',
 | |
|     'NoSubstitutionTemplate',
 | |
|     'NullLiteral',
 | |
|     'NumericLiteral',
 | |
|     'BigIntLiteral',
 | |
|     'RegularExpressionLiteral',
 | |
|     'StringLiteral',
 | |
|     'TemplateHead',
 | |
|     'TemplateMiddle',
 | |
|     'TemplateTail',
 | |
| ]
 | |
| 
 | |
| # List of all terminals, other than keywords, that our (hand-coded) lexer
 | |
| # produces.
 | |
| #
 | |
| # (What our lexer implements for IdentifierName and friends is a slight
 | |
| # variation on the spec. See `ECMASCRIPT_SYNTHETIC_TERMINALS` above.)
 | |
| TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR = ECMASCRIPT_TOKEN_NAMES + [
 | |
|     'Identifier',
 | |
|     'Name',
 | |
| ]
 | |
| 
 | |
| 
 | |
| def load_syntactic_grammar(filename, extensions):
 | |
|     """Load the ECMAScript syntactic grammar."""
 | |
|     with open(filename) as f:
 | |
|         grammar_text = f.read()
 | |
| 
 | |
|     extensions_content = []
 | |
|     for ext_filename in extensions:
 | |
|         # Extract grammar_extension! macro content, and store in a list.
 | |
|         with open(ext_filename) as ext_file:
 | |
|             content = None
 | |
|             start_line = 0
 | |
|             for lineno, line in enumerate(ext_file):
 | |
|                 if line.startswith("grammar_extension!"):
 | |
|                     assert line.endswith("{\n")
 | |
|                     content = ""
 | |
|                     # +2: enumerate starts at 0, while the first line is 1.
 | |
|                     # Also, the first line added to the content variable is the
 | |
|                     # next one.
 | |
|                     start_line = lineno + 2
 | |
|                     continue
 | |
|                 if line.startswith("}") and content:
 | |
|                     extensions_content.append((ext_filename, start_line, content))
 | |
|                     content = None
 | |
|                     continue
 | |
|                 if content is not None:
 | |
|                     content += line
 | |
| 
 | |
|     g = parse_esgrammar(
 | |
|         grammar_text,
 | |
|         filename=filename,
 | |
|         extensions=extensions_content,
 | |
|         goals=ECMASCRIPT_SYNTACTIC_GOAL_NTS,
 | |
|         synthetic_terminals=ECMASCRIPT_SYNTHETIC_TERMINALS,
 | |
|         terminal_names=TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR)
 | |
| 
 | |
|     return g
 |