forked from mirrors/gecko-dev
This is another incremental update of wgpu and WebGPU APIs. It increases stability and makes us run most of the Austin's samples (again). Likely fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1704148 Differential Revision: https://phabricator.services.mozilla.com/D112828
567 lines
16 KiB
Python
Executable file
567 lines
16 KiB
Python
Executable file
"""extract_es_grammar.py - Extract the grammar from the ECMAScript spec
|
|
|
|
To run this script, you first need to get the source of the version of
|
|
the ECMAScript spec you're interested in.
|
|
|
|
cd ../..
|
|
mkdir tc39
|
|
cd tc39
|
|
git clone git@github.com:tc39/ecma262.git
|
|
|
|
Then:
|
|
|
|
make js_parser/es.esgrammar
|
|
|
|
You can also use this script on a random HTTPS URL, like:
|
|
|
|
URL=https://raw.githubusercontent.com/tc39/proposal-class-fields/master/spec.html
|
|
python extract_esgrammar.py $URL
|
|
|
|
"""
|
|
|
|
import argparse
|
|
import urllib
|
|
import html5lib # type: ignore
|
|
import re
|
|
from textwrap import dedent
|
|
|
|
|
|
HTML = "{http://www.w3.org/1999/xhtml}"
|
|
INS_TAG = HTML + "ins"
|
|
DEL_TAG = HTML + "del"
|
|
|
|
INS = '+'
|
|
DEL = '-'
|
|
KEEP = ' '
|
|
|
|
|
|
def pre_with_code_filter_factory(e):
|
|
"""Checks if the <pre> is used in the following pattern:
|
|
|
|
```
|
|
<pre><code>
|
|
</code></pre>
|
|
```
|
|
|
|
If so, return a filter that formats the content, removing extra spaces.
|
|
line-wrap, and backquote added by <code>.
|
|
|
|
"""
|
|
if e.text and e.text.strip() != '':
|
|
return False
|
|
|
|
if len(e) != 1:
|
|
return False
|
|
|
|
if e[0].tag != '{http://www.w3.org/1999/xhtml}code':
|
|
return False
|
|
|
|
if e[0].tail and e[0].tail.strip() != '':
|
|
return False
|
|
|
|
def children_filter(texts):
|
|
while len(texts) > 0 and texts[0].strip() == '':
|
|
texts.pop(0)
|
|
if len(texts) > 0 and texts[0].strip() == '`':
|
|
texts.pop(0)
|
|
while len(texts) > 0 and texts[0].strip() == '':
|
|
texts.pop(0)
|
|
|
|
while len(texts) > 0 and texts[-1].strip() == '':
|
|
texts.pop()
|
|
if len(texts) > 0 and texts[-1].strip() == '`':
|
|
texts.pop()
|
|
while len(texts) > 0 and texts[-1].strip() == '':
|
|
texts.pop()
|
|
|
|
is_first = True
|
|
for text in texts:
|
|
for line in text.split('\n'):
|
|
line = line.strip()
|
|
if line == '':
|
|
continue
|
|
|
|
if not is_first:
|
|
yield '\n'
|
|
is_first = False
|
|
|
|
yield line
|
|
|
|
return children_filter
|
|
|
|
|
|
# Rules for extracting text, used by extracting Early Errors.
|
|
EXTRA_RULES_FOR_EE = {
|
|
'b': {},
|
|
'br': {},
|
|
'code': {
|
|
'prefix': '`',
|
|
'postfix': '`',
|
|
},
|
|
'emu-alg': {},
|
|
'emu-grammar': {},
|
|
'emu-note': {
|
|
'prefix': ['NOTE', '\n'],
|
|
'strip': True,
|
|
},
|
|
'emu-xref': {
|
|
'prefix': lambda e: e.attrib.get('href'),
|
|
},
|
|
'ins': {
|
|
'ignore_highlighted': True,
|
|
},
|
|
'p': {
|
|
'strip': True,
|
|
},
|
|
'pre': {
|
|
'prefix': ['\n', '\n', '```', '\n'],
|
|
'postfix': ['\n', '```', '\n', '\n'],
|
|
'strip': True,
|
|
'children_filter_factroy': pre_with_code_filter_factory,
|
|
},
|
|
'sub': {
|
|
'prefix': '_',
|
|
},
|
|
'sup': {
|
|
'prefix': '^',
|
|
},
|
|
}
|
|
|
|
|
|
def apply_prefix_postfix_rule(e, rule, name):
|
|
"""If rule is provided, apply prefix/postfix rule to the element `e`.
|
|
"""
|
|
if not rule:
|
|
return
|
|
|
|
fix = rule.get(name)
|
|
if callable(fix):
|
|
yield fix(e)
|
|
elif isinstance(fix, list):
|
|
for item in fix:
|
|
yield item
|
|
elif fix:
|
|
yield fix
|
|
|
|
|
|
def apply_strip_rule(text, rule):
|
|
"""If rule is provided, apply strip rule to the text.
|
|
"""
|
|
if not text:
|
|
return
|
|
|
|
if not rule:
|
|
yield text
|
|
return
|
|
|
|
strip = rule.get('strip')
|
|
if strip:
|
|
yield text.strip()
|
|
else:
|
|
yield text
|
|
|
|
|
|
def fragment_child_chunks(e, extra_rules={}):
|
|
"""Partly interpret the content of `e`, yielding `text`,
|
|
applying extra_rules.
|
|
|
|
Concatenating the yielded `text` values gives the full text of `e`.
|
|
"""
|
|
rule = extra_rules[e.tag.replace(HTML, '')]
|
|
|
|
children_filter = None
|
|
factroy = rule.get('children_filter_factroy')
|
|
if factroy:
|
|
children_filter = factroy(e)
|
|
|
|
yield from apply_prefix_postfix_rule(e, rule, 'prefix')
|
|
yield from apply_strip_rule(e.text, rule)
|
|
|
|
for child in e:
|
|
if child.tag.replace(HTML, '') not in extra_rules:
|
|
raise ValueError("unrecognized element: " + child.tag)
|
|
|
|
texts = []
|
|
for text in fragment_child_chunks(child, extra_rules):
|
|
if children_filter:
|
|
texts.append(text)
|
|
else:
|
|
yield text
|
|
|
|
if children_filter:
|
|
for text in children_filter(texts):
|
|
yield text
|
|
|
|
yield from apply_strip_rule(e.tail, rule)
|
|
yield from apply_prefix_postfix_rule(e, rule, 'postfix')
|
|
|
|
|
|
def is_highlighted_ins(e):
|
|
"""Returns True if e matches the following pattern:
|
|
|
|
<ins>highlighted</ins> text:
|
|
|
|
See `fragment_chunks` comment for the details
|
|
"""
|
|
if len(e) != 0:
|
|
return False
|
|
|
|
if not e.text:
|
|
return False
|
|
|
|
if e.text != 'highlighted':
|
|
return False
|
|
|
|
if not e.tail:
|
|
return False
|
|
|
|
if not e.tail.startswith(' text:'):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def is_negligible_ins(e, extra_rules):
|
|
"""Returns True if the 'ignore_highlighted' rule is defined for <ins>,
|
|
and it matches to the negligible pattern.
|
|
|
|
See `fragment_chunks` comment for the details
|
|
"""
|
|
|
|
rule = extra_rules.get(e.tag.replace(HTML, ''))
|
|
if not rule:
|
|
return False
|
|
|
|
if rule.get('ignore_highlighted'):
|
|
if is_highlighted_ins(e):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def fragment_chunks(e, extra_rules={}):
|
|
"""Partly interpret the content of `e`, yielding pairs (ty, text).
|
|
|
|
If `extra_rules` isn't provided, the content of `e` must be text with 0
|
|
or more <ins>/<del> elements.
|
|
|
|
The goal is to turn the tree `e` into a simple series of tagged strings.
|
|
|
|
Yields pairs (ty, text) where ty in (INS, DEL, KEEP). Concatenating the
|
|
yielded `text` values gives the full text of `e`.
|
|
|
|
`extra_rules` is a dictionary that defines extra elements that is allowed
|
|
as the content of `e`.
|
|
Each item defines a rule for the tag, with the following:
|
|
* prefix
|
|
Put a prefix before the text
|
|
Possible values:
|
|
* string
|
|
* list of string
|
|
* function
|
|
receives `Element` and returns a prefix string
|
|
* postfix
|
|
Put a postfix after the text
|
|
value uses the same format as prefix
|
|
* strip
|
|
True to strip whitespaces before/after element's text
|
|
* children_filter_factroy
|
|
A function that receives `Element`, and returns a filter function or None
|
|
The filter function receives a list of texts for child nodes, and
|
|
returns a list of filtered text
|
|
* ignore_highlighted
|
|
Effective only with <ins>
|
|
Do not treat <ins> as an insertion if it matches the following pattern:
|
|
|
|
<ins>highlighted</ins> text:
|
|
|
|
This pattern is used in Annex B description.
|
|
"""
|
|
|
|
rule = extra_rules.get(e.tag.replace(HTML, ''))
|
|
|
|
for text in apply_prefix_postfix_rule(e, rule, 'prefix'):
|
|
yield KEEP, text
|
|
for text in apply_strip_rule(e.text, rule):
|
|
yield KEEP, text
|
|
|
|
for child in e:
|
|
if child.tag == INS_TAG and not is_negligible_ins(child, extra_rules):
|
|
ty = INS
|
|
elif child.tag == DEL_TAG:
|
|
ty = DEL
|
|
else:
|
|
if child.tag.replace(HTML, '') not in extra_rules:
|
|
raise ValueError("unrecognized element: " + child.tag)
|
|
|
|
for text in fragment_child_chunks(child, extra_rules):
|
|
yield KEEP, text
|
|
continue
|
|
|
|
if child.text:
|
|
yield ty, child.text
|
|
if len(child) != 0:
|
|
for grandchild in child:
|
|
if grandchild.tag.replace(HTML, '') not in extra_rules:
|
|
raise ValueError("unsupported nested element {} in {}"
|
|
.format(grandchild.tag, child.tag))
|
|
|
|
for text in fragment_child_chunks(grandchild, extra_rules):
|
|
yield ty, text
|
|
if child.tail:
|
|
yield KEEP, child.tail
|
|
|
|
for text in apply_strip_rule(e.tail, rule):
|
|
yield KEEP, text
|
|
for text in apply_prefix_postfix_rule(e, rule, 'postfix'):
|
|
yield KEEP, text
|
|
|
|
|
|
def fragment_parts(e, **kwargs):
|
|
"""Like fragment_chunks, but with two fixups.
|
|
|
|
1. Break up pairs that include both a newline and any other text.
|
|
|
|
2. Move newlines inside of a preceding INS or DEL element that spans its
|
|
whole line.
|
|
"""
|
|
line_has_ins = False
|
|
line_has_del = False
|
|
for chunk_ty, text in fragment_chunks(e, **kwargs):
|
|
for piece in re.split(r'(\n)', text):
|
|
ty = chunk_ty
|
|
if piece != '':
|
|
if piece == '\n':
|
|
# Possibly move newline inside preceding INS or DEL.
|
|
if line_has_ins and not line_has_del:
|
|
ty = INS
|
|
elif line_has_del and not line_has_ins:
|
|
ty = DEL
|
|
else:
|
|
ty = KEEP
|
|
line_has_del = False
|
|
line_has_ins = False
|
|
elif piece.strip() != '':
|
|
if ty in (INS, KEEP):
|
|
line_has_ins = True
|
|
if ty in (DEL, KEEP):
|
|
line_has_del = True
|
|
yield ty, piece
|
|
|
|
|
|
def generate_fragment_patch(e, **kwargs):
|
|
line_before = ''
|
|
line_after = ''
|
|
|
|
def end_line(ty):
|
|
nonlocal line_before, line_after
|
|
if line_before.rstrip() == line_after.rstrip():
|
|
yield " ", line_after
|
|
else:
|
|
if line_before.strip() != '' or ty != INS:
|
|
yield "-", line_before
|
|
if line_after.strip() != '' or ty != DEL:
|
|
yield "+", line_after
|
|
line_before = ''
|
|
line_after = ''
|
|
|
|
for ty, text in fragment_parts(e, **kwargs):
|
|
if text == '\n':
|
|
yield from end_line(ty)
|
|
else:
|
|
if ty in (KEEP, DEL):
|
|
line_before += text
|
|
if ty in (KEEP, INS):
|
|
line_after += text
|
|
if line_before or line_after:
|
|
yield from end_line(KEEP)
|
|
|
|
|
|
def dedent_pairs(pairs):
|
|
"""Dedent the `pairs`'s `text` part
|
|
"""
|
|
pairs = list(pairs)
|
|
|
|
# Using textwrap.dedent on this requires a few lines of hackery.
|
|
types = [ty for ty, _line in pairs]
|
|
dedented_lines = dedent(''.join(line + '\n' for ty, line in pairs)).splitlines()
|
|
assert len(dedented_lines) == len(pairs)
|
|
|
|
return zip(types, dedented_lines)
|
|
|
|
|
|
def print_pairs(pairs):
|
|
last_line_was_empty = False
|
|
|
|
for ty, line in pairs:
|
|
if ty == KEEP and line == '':
|
|
if last_line_was_empty:
|
|
continue
|
|
last_line_was_empty = True
|
|
else:
|
|
last_line_was_empty = False
|
|
|
|
print(ty + line)
|
|
|
|
|
|
def print_fragment_patch(e):
|
|
print_pairs(dedent_pairs(generate_fragment_patch(e)))
|
|
|
|
|
|
def is_annex_early_errors(e):
|
|
"""Returns True if the <emu-annex> element contains Early Errors.
|
|
"""
|
|
h1 = e.find('{http://www.w3.org/1999/xhtml}h1')
|
|
if 'Early Errors' in h1.text:
|
|
return True
|
|
|
|
p = e.find('{http://www.w3.org/1999/xhtml}p')
|
|
if p:
|
|
if 'Early Error' in html5lib.serializer.serialize(p):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_parent_map(document):
|
|
"""Returns a map from a element to parent element.
|
|
This is necessary because `xml.etree.ElementTree.Element` doesn't have
|
|
a reference to parent element.
|
|
"""
|
|
parent_map = dict()
|
|
for parent in document.iter():
|
|
for child in parent:
|
|
parent_map[child] = parent
|
|
return parent_map
|
|
|
|
|
|
def get_titles(parent_map, e):
|
|
"""Returns a list of section titles for a section.
|
|
"""
|
|
titles = []
|
|
while e.tag != '{http://www.w3.org/1999/xhtml}body':
|
|
h1 = e.find('{http://www.w3.org/1999/xhtml}h1')
|
|
titles.insert(0, h1.text)
|
|
e = parent_map[e]
|
|
|
|
return titles
|
|
|
|
|
|
def generate_ul_fragment_patch(e, depth):
|
|
"""Similar to generate_fragment_patch, but for <ul>
|
|
"""
|
|
first_line_prefix = '{}* '.format(' ' * depth)
|
|
other_line_prefix = '{} '.format(' ' * depth)
|
|
|
|
for item in e:
|
|
if item.tag != '{http://www.w3.org/1999/xhtml}li':
|
|
raise ValueError("unrecognized element: " + item.tag)
|
|
|
|
pairs = generate_fragment_patch(item,
|
|
extra_rules=EXTRA_RULES_FOR_EE)
|
|
|
|
is_first_line = True
|
|
|
|
for ty, line in dedent_pairs(pairs):
|
|
if is_first_line and line.strip() == '':
|
|
continue
|
|
|
|
if is_first_line:
|
|
is_first_line = False
|
|
yield ty, '{}{}'.format(first_line_prefix, line.strip())
|
|
else:
|
|
yield ty, '{}{}'.format(other_line_prefix, line.strip())
|
|
|
|
|
|
def generate_early_errors_fragment_patch(parent_map, e):
|
|
for t in get_titles(parent_map, e):
|
|
yield KEEP, '# {}'.format(t)
|
|
yield KEEP, '# #{}'.format(e.attrib.get('id'))
|
|
yield KEEP, ''
|
|
|
|
for child in e:
|
|
if child.tag == '{http://www.w3.org/1999/xhtml}h1':
|
|
continue
|
|
|
|
if child.tag == '{http://www.w3.org/1999/xhtml}emu-grammar':
|
|
pairs = generate_fragment_patch(child)
|
|
yield from dedent_pairs(pairs)
|
|
yield KEEP, ''
|
|
elif child.tag == '{http://www.w3.org/1999/xhtml}ul':
|
|
yield from generate_ul_fragment_patch(child, 0)
|
|
elif child.tag == '{http://www.w3.org/1999/xhtml}emu-note':
|
|
pairs = generate_fragment_patch(child,
|
|
extra_rules=EXTRA_RULES_FOR_EE)
|
|
yield from dedent_pairs(pairs)
|
|
yield KEEP, ''
|
|
elif child.tag == '{http://www.w3.org/1999/xhtml}p':
|
|
pairs = generate_fragment_patch(child,
|
|
extra_rules=EXTRA_RULES_FOR_EE)
|
|
yield from dedent_pairs(pairs)
|
|
yield KEEP, ''
|
|
elif (child.tag == '{http://www.w3.org/1999/xhtml}emu-alg'
|
|
and e.attrib.get('id') == 'sec-__proto__-property-names-in-object-initializers'):
|
|
# "__proto__ Property Names in Object Initializers" section
|
|
# contains changes both for early errors and algorithm.
|
|
# Ignore algorithm part.
|
|
pass
|
|
else:
|
|
raise ValueError('unsupported element in early errors section: {}'
|
|
.format(child.tag))
|
|
|
|
|
|
def print_early_errors(parent_map, e):
|
|
pairs = generate_early_errors_fragment_patch(parent_map, e)
|
|
print_pairs(dedent_pairs(pairs))
|
|
|
|
|
|
def extract(filename, unfiltered, target):
|
|
if filename.startswith("https:"):
|
|
file_obj = urllib.request.urlopen(filename)
|
|
else:
|
|
file_obj = open(filename, "rb")
|
|
|
|
with file_obj:
|
|
document = html5lib.parse(file_obj)
|
|
|
|
if target == 'grammar':
|
|
for e in document.iter("{http://www.w3.org/1999/xhtml}emu-grammar"):
|
|
if unfiltered or e.attrib.get("type") == "definition":
|
|
print_fragment_patch(e)
|
|
elif target == 'ee':
|
|
parent_map = get_parent_map(document)
|
|
for e in document.iter("{http://www.w3.org/1999/xhtml}emu-clause"):
|
|
if e.attrib.get("id").endswith("-early-errors"):
|
|
print_early_errors(parent_map, e)
|
|
elif target == 'ee-annex':
|
|
parent_map = get_parent_map(document)
|
|
for e in document.iter("{http://www.w3.org/1999/xhtml}emu-annex"):
|
|
if is_annex_early_errors(e):
|
|
print_early_errors(parent_map, e)
|
|
else:
|
|
raise ValueError('Unknown target: {}'.format(target))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(
|
|
description="Extract esgrammar from ECMAScript specifications.")
|
|
parser.add_argument(
|
|
'url',
|
|
nargs=1,
|
|
help="the https: url or local filename of an HTML file containing <emu-grammar> tags")
|
|
parser.add_argument(
|
|
'--unfiltered',
|
|
action='store_true',
|
|
help="Include even <emu-grammar> elements that don't have `type=definition`")
|
|
parser.add_argument(
|
|
'--target',
|
|
default='grammar',
|
|
choices=['grammar', 'ee', 'ee-annex'],
|
|
help="What to extract (\
|
|
grammar = esgrammar, \
|
|
ee = early errors, \
|
|
ee-annex = early errors in Annex\
|
|
)")
|
|
|
|
args = parser.parse_args()
|
|
extract(args.url[0], args.unfiltered, args.target)
|