forked from mirrors/gecko-dev
Differential Revision: https://phabricator.services.mozilla.com/D69886 --HG-- rename : third_party/rust/jsparagus-emitter/src/scope_pass.rs => third_party/rust/jsparagus-scope/src/context.rs rename : third_party/rust/jsparagus-emitter/src/scope.rs => third_party/rust/jsparagus-scope/src/data.rs rename : third_party/rust/jsparagus-emitter/src/frame_slot.rs => third_party/rust/jsparagus-scope/src/frame_slot.rs extra : moz-landing-system : lando
129 lines
4.3 KiB
Python
129 lines
4.3 KiB
Python
""" Functions for loading the ECMAScript lexical and syntactic grammars. """
|
|
|
|
from jsparagus.ordered import OrderedSet, OrderedFrozenSet
|
|
from jsparagus import gen, grammar
|
|
from .lexer import ECMASCRIPT_FULL_KEYWORDS, ECMASCRIPT_CONDITIONAL_KEYWORDS
|
|
from .parse_esgrammar import parse_esgrammar
|
|
|
|
|
|
ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS: grammar.SyntheticTerminalsDict = {
|
|
# Theoretically, this should be the set of all Unicode characters, but that
|
|
# would take a lot of memory, and in practice, the set is not used.
|
|
'SourceCharacter': OrderedFrozenSet([]),
|
|
}
|
|
|
|
ECMASCRIPT_LEXICAL_GOAL_NTS = [
|
|
'WhiteSpace',
|
|
'InputElementDiv',
|
|
'InputElementRegExp',
|
|
]
|
|
|
|
|
|
def load_lexical_grammar(filename):
|
|
"""Load the ECMAScript lexical grammar."""
|
|
with open(filename) as f:
|
|
grammar_text = f.read()
|
|
g = parse_esgrammar(
|
|
grammar_text,
|
|
filename=filename,
|
|
goals=ECMASCRIPT_LEXICAL_GOAL_NTS,
|
|
synthetic_terminals=ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS,
|
|
terminal_names=ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS.keys())
|
|
return gen.expand_parameterized_nonterminals(g)
|
|
|
|
|
|
ECMASCRIPT_SYNTACTIC_GOAL_NTS = [
|
|
'Script',
|
|
'Module',
|
|
# 'FormalParameters',
|
|
# 'FunctionBody',
|
|
]
|
|
|
|
# Identifiers are complicated. A "synthetic terminal" is a shorthand symbol
|
|
# that stands for any one of a set of terminals. For example, *IdentifierName*
|
|
# stands for any token that looks like an identifier, including keywords.
|
|
#
|
|
# These sets must use the names of the terminals produced by the lexer. Except
|
|
# for `Name`, our lexer output uses the terminal symbols of the syntactic
|
|
# grammar, which include some nonterminals of the lexical grammar. The
|
|
# syntactic grammar uses `BooleanLiteral`, not `true` and `false`; and it uses
|
|
# `NullLiteral` instead of `null`.
|
|
ECMASCRIPT_SYNTHETIC_TERMINALS = {
|
|
'IdentifierName': OrderedSet([
|
|
'Name',
|
|
'BooleanLiteral',
|
|
'NullLiteral',
|
|
'NameWithEscape',
|
|
*ECMASCRIPT_FULL_KEYWORDS,
|
|
*ECMASCRIPT_CONDITIONAL_KEYWORDS
|
|
]) - OrderedSet(['true', 'false', 'null']),
|
|
'Identifier': OrderedSet([
|
|
'Name',
|
|
'NameWithEscape',
|
|
*ECMASCRIPT_CONDITIONAL_KEYWORDS
|
|
]),
|
|
}
|
|
|
|
# Lexical nonterminals that are used as terminals in the syntactic grammar.
|
|
ECMASCRIPT_TOKEN_NAMES = [
|
|
'BooleanLiteral',
|
|
'IdentifierName',
|
|
'PrivateIdentifier',
|
|
'NoSubstitutionTemplate',
|
|
'NullLiteral',
|
|
'NumericLiteral',
|
|
'BigIntLiteral',
|
|
'RegularExpressionLiteral',
|
|
'StringLiteral',
|
|
'TemplateHead',
|
|
'TemplateMiddle',
|
|
'TemplateTail',
|
|
]
|
|
|
|
# List of all terminals, other than keywords, that our (hand-coded) lexer
|
|
# produces.
|
|
#
|
|
# (What our lexer implements for IdentifierName and friends is a slight
|
|
# variation on the spec. See `ECMASCRIPT_SYNTHETIC_TERMINALS` above.)
|
|
TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR = ECMASCRIPT_TOKEN_NAMES + [
|
|
'Identifier',
|
|
'Name',
|
|
]
|
|
|
|
|
|
def load_syntactic_grammar(filename, extensions):
|
|
"""Load the ECMAScript syntactic grammar."""
|
|
with open(filename) as f:
|
|
grammar_text = f.read()
|
|
|
|
extensions_content = []
|
|
for ext_filename in extensions:
|
|
# Extract grammar_extension! macro content, and store in a list.
|
|
with open(ext_filename) as ext_file:
|
|
content = None
|
|
start_line = 0
|
|
for lineno, line in enumerate(ext_file):
|
|
if line.startswith("grammar_extension!"):
|
|
assert line.endswith("{\n")
|
|
content = ""
|
|
# +2: enumerate starts at 0, while the first line is 1.
|
|
# Also, the first line added to the content variable is the
|
|
# next one.
|
|
start_line = lineno + 2
|
|
continue
|
|
if line.startswith("}") and content:
|
|
extensions_content.append((ext_filename, start_line, content))
|
|
content = None
|
|
continue
|
|
if content is not None:
|
|
content += line
|
|
|
|
g = parse_esgrammar(
|
|
grammar_text,
|
|
filename=filename,
|
|
extensions=extensions_content,
|
|
goals=ECMASCRIPT_SYNTACTIC_GOAL_NTS,
|
|
synthetic_terminals=ECMASCRIPT_SYNTHETIC_TERMINALS,
|
|
terminal_names=TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR)
|
|
|
|
return g
|