forked from mirrors/gecko-dev
Backed out changeset 332ce0963b4e (bug 1633039)
Backed out changeset a9904cbc40d9 (bug 1633037)
Backed out changeset d06b0ec349f8 (bug 1599658)
Backed out changeset 8fd300cad80f (bug 1633016)
Backed out changeset f8820941c703 (bug 1632916)
Backed out changeset ac9c2c8746ed (bug 1632920)
399 lines
14 KiB
Python
399 lines
14 KiB
Python
import itertools
|
|
import json
|
|
import os
|
|
from copy import deepcopy
|
|
from multiprocessing import Pool, cpu_count
|
|
from six import PY3, iteritems, itervalues, string_types, binary_type, text_type
|
|
|
|
from . import vcs
|
|
from .item import (ConformanceCheckerTest, ManifestItem, ManualTest, RefTest, SupportFile,
|
|
TestharnessTest, VisualTest, WebDriverSpecTest, CrashTest)
|
|
from .log import get_logger
|
|
from .sourcefile import SourceFile
|
|
from .typedata import TypeData
|
|
|
|
MYPY = False
|
|
if MYPY:
|
|
# MYPY is set to True when run under Mypy.
|
|
from logging import Logger
|
|
from typing import Any
|
|
from typing import Container
|
|
from typing import Dict
|
|
from typing import IO
|
|
from typing import Iterator
|
|
from typing import Iterable
|
|
from typing import Optional
|
|
from typing import Set
|
|
from typing import Text
|
|
from typing import Tuple
|
|
from typing import Type
|
|
from typing import Union
|
|
|
|
try:
|
|
import ujson
|
|
fast_json = ujson
|
|
except ImportError:
|
|
fast_json = json # type: ignore
|
|
|
|
CURRENT_VERSION = 8 # type: int
|
|
|
|
|
|
class ManifestError(Exception):
|
|
pass
|
|
|
|
|
|
class ManifestVersionMismatch(ManifestError):
|
|
pass
|
|
|
|
|
|
item_classes = {"testharness": TestharnessTest,
|
|
"reftest": RefTest,
|
|
"crashtest": CrashTest,
|
|
"manual": ManualTest,
|
|
"wdspec": WebDriverSpecTest,
|
|
"conformancechecker": ConformanceCheckerTest,
|
|
"visual": VisualTest,
|
|
"support": SupportFile} # type: Dict[str, Type[ManifestItem]]
|
|
|
|
|
|
def compute_manifest_items(source_file):
|
|
# type: (SourceFile) -> Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]
|
|
rel_path_parts = source_file.rel_path_parts
|
|
new_type, manifest_items = source_file.manifest_items()
|
|
file_hash = source_file.hash
|
|
return rel_path_parts, new_type, set(manifest_items), file_hash
|
|
|
|
if MYPY:
|
|
ManifestDataType = Dict[Any, TypeData]
|
|
else:
|
|
ManifestDataType = dict
|
|
|
|
class ManifestData(ManifestDataType):
|
|
def __init__(self, manifest):
|
|
# type: (Manifest) -> None
|
|
"""Dictionary subclass containing a TypeData instance for each test type,
|
|
keyed by type name"""
|
|
self.initialized = False # type: bool
|
|
for key, value in iteritems(item_classes):
|
|
self[key] = TypeData(manifest, value)
|
|
self.initialized = True
|
|
self.json_obj = None # type: None
|
|
|
|
def __setitem__(self, key, value):
|
|
# type: (str, TypeData) -> None
|
|
if self.initialized:
|
|
raise AttributeError
|
|
dict.__setitem__(self, key, value)
|
|
|
|
def paths(self):
|
|
# type: () -> Set[Text]
|
|
"""Get a list of all paths containing test items
|
|
without actually constructing all the items"""
|
|
rv = set() # type: Set[Text]
|
|
for item_data in itervalues(self):
|
|
for item in item_data:
|
|
rv.add(os.path.sep.join(item))
|
|
return rv
|
|
|
|
def type_by_path(self):
|
|
# type: () -> Dict[Tuple[Text, ...], str]
|
|
rv = {}
|
|
for item_type, item_data in iteritems(self):
|
|
for item in item_data:
|
|
rv[item] = item_type
|
|
return rv
|
|
|
|
|
|
|
|
class Manifest(object):
|
|
def __init__(self, tests_root=None, url_base="/"):
|
|
# type: (Optional[str], Text) -> None
|
|
assert url_base is not None
|
|
self._data = ManifestData(self) # type: ManifestData
|
|
self.tests_root = tests_root # type: Optional[str]
|
|
self.url_base = url_base # type: Text
|
|
|
|
def __iter__(self):
|
|
# type: () -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
|
|
return self.itertypes()
|
|
|
|
def itertypes(self, *types):
|
|
# type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
|
|
for item_type in (types or sorted(self._data.keys())):
|
|
for path in self._data[item_type]:
|
|
str_path = os.sep.join(path)
|
|
tests = self._data[item_type][path]
|
|
yield item_type, str_path, tests
|
|
|
|
def iterpath(self, path):
|
|
# type: (Text) -> Iterable[ManifestItem]
|
|
tpath = tuple(path.split(os.path.sep))
|
|
|
|
for type_tests in self._data.values():
|
|
i = type_tests.get(tpath, set())
|
|
assert i is not None
|
|
for test in i:
|
|
yield test
|
|
|
|
def iterdir(self, dir_name):
|
|
# type: (Text) -> Iterable[ManifestItem]
|
|
tpath = tuple(dir_name.split(os.path.sep))
|
|
tpath_len = len(tpath)
|
|
|
|
for type_tests in self._data.values():
|
|
for path, tests in iteritems(type_tests):
|
|
if path[:tpath_len] == tpath:
|
|
for test in tests:
|
|
yield test
|
|
|
|
def update(self, tree, parallel=True):
|
|
# type: (Iterable[Tuple[Union[SourceFile, bytes], bool]], bool) -> bool
|
|
"""Update the manifest given an iterable of items that make up the updated manifest.
|
|
|
|
The iterable must either generate tuples of the form (SourceFile, True) for paths
|
|
that are to be updated, or (path, False) for items that are not to be updated. This
|
|
unusual API is designed as an optimistaion meaning that SourceFile items need not be
|
|
constructed in the case we are not updating a path, but the absence of an item from
|
|
the iterator may be used to remove defunct entries from the manifest."""
|
|
|
|
changed = False
|
|
|
|
# Create local variable references to these dicts so we avoid the
|
|
# attribute access in the hot loop below
|
|
data = self._data
|
|
|
|
types = data.type_by_path()
|
|
deleted = set(types)
|
|
|
|
to_update = []
|
|
|
|
for source_file, update in tree:
|
|
if not update:
|
|
assert isinstance(source_file, (binary_type, text_type))
|
|
deleted.remove(tuple(source_file.split(os.path.sep)))
|
|
else:
|
|
assert not isinstance(source_file, bytes)
|
|
rel_path_parts = source_file.rel_path_parts
|
|
assert isinstance(rel_path_parts, tuple)
|
|
|
|
is_new = rel_path_parts not in deleted # type: bool
|
|
hash_changed = False # type: bool
|
|
|
|
if not is_new:
|
|
deleted.remove(rel_path_parts)
|
|
old_type = types[rel_path_parts]
|
|
old_hash = data[old_type].hashes[rel_path_parts]
|
|
file_hash = source_file.hash # type: Text
|
|
if old_hash != file_hash:
|
|
hash_changed = True
|
|
del data[old_type][rel_path_parts]
|
|
|
|
if is_new or hash_changed:
|
|
to_update.append(source_file)
|
|
|
|
if to_update:
|
|
changed = True
|
|
|
|
if parallel and len(to_update) > 25 and cpu_count() > 1:
|
|
# 25 derived experimentally (2020-01) to be approximately
|
|
# the point at which it is quicker to create Pool and
|
|
# parallelize this
|
|
pool = Pool()
|
|
|
|
# chunksize set > 1 when more than 10000 tests, because
|
|
# chunking is a net-gain once we get to very large numbers
|
|
# of items (again, experimentally, 2020-01)
|
|
results = pool.imap_unordered(compute_manifest_items,
|
|
to_update,
|
|
chunksize=max(1, len(to_update) // 10000)
|
|
) # type: Iterator[Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]]
|
|
elif PY3:
|
|
results = map(compute_manifest_items, to_update)
|
|
else:
|
|
results = itertools.imap(compute_manifest_items, to_update)
|
|
|
|
for result in results:
|
|
rel_path_parts, new_type, manifest_items, file_hash = result
|
|
data[new_type][rel_path_parts] = manifest_items
|
|
data[new_type].hashes[rel_path_parts] = file_hash
|
|
|
|
if deleted:
|
|
changed = True
|
|
for rel_path_parts in deleted:
|
|
for test_data in itervalues(data):
|
|
if rel_path_parts in test_data:
|
|
del test_data[rel_path_parts]
|
|
|
|
return changed
|
|
|
|
def to_json(self, caller_owns_obj=True):
|
|
# type: (bool) -> Dict[Text, Any]
|
|
"""Dump a manifest into a object which can be serialized as JSON
|
|
|
|
If caller_owns_obj is False, then the return value remains
|
|
owned by the manifest; it is _vitally important_ that _no_
|
|
(even read) operation is done on the manifest, as otherwise
|
|
objects within the object graph rooted at the return value can
|
|
be mutated. This essentially makes this mode very dangerous
|
|
and only to be used under extreme care.
|
|
|
|
"""
|
|
out_items = {
|
|
test_type: type_paths.to_json()
|
|
for test_type, type_paths in iteritems(self._data) if type_paths
|
|
}
|
|
|
|
if caller_owns_obj:
|
|
out_items = deepcopy(out_items)
|
|
|
|
rv = {"url_base": self.url_base,
|
|
"items": out_items,
|
|
"version": CURRENT_VERSION} # type: Dict[Text, Any]
|
|
return rv
|
|
|
|
@classmethod
|
|
def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False):
|
|
# type: (str, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest
|
|
"""Load a manifest from a JSON object
|
|
|
|
This loads a manifest for a given local test_root path from an
|
|
object obj, potentially partially loading it to only load the
|
|
types given by types.
|
|
|
|
If callee_owns_obj is True, then ownership of obj transfers
|
|
to this function when called, and the caller must never mutate
|
|
the obj or anything referred to in the object graph rooted at
|
|
obj.
|
|
|
|
"""
|
|
version = obj.get("version")
|
|
if version != CURRENT_VERSION:
|
|
raise ManifestVersionMismatch
|
|
|
|
self = cls(tests_root, url_base=obj.get("url_base", "/"))
|
|
if not hasattr(obj, "items"):
|
|
raise ManifestError
|
|
|
|
for test_type, type_paths in iteritems(obj["items"]):
|
|
if test_type not in item_classes:
|
|
raise ManifestError
|
|
|
|
if types and test_type not in types:
|
|
continue
|
|
|
|
if not callee_owns_obj:
|
|
type_paths = deepcopy(type_paths)
|
|
|
|
self._data[test_type].set_json(type_paths)
|
|
|
|
return self
|
|
|
|
|
|
def load(tests_root, manifest, types=None):
|
|
# type: (str, Union[IO[bytes], str], Optional[Container[Text]]) -> Optional[Manifest]
|
|
logger = get_logger()
|
|
|
|
logger.warning("Prefer load_and_update instead")
|
|
return _load(logger, tests_root, manifest, types)
|
|
|
|
|
|
__load_cache = {} # type: Dict[str, Manifest]
|
|
|
|
|
|
def _load(logger, # type: Logger
|
|
tests_root, # type: str
|
|
manifest, # type: Union[IO[bytes], str]
|
|
types=None, # type: Optional[Container[Text]]
|
|
allow_cached=True # type: bool
|
|
):
|
|
# type: (...) -> Optional[Manifest]
|
|
manifest_path = (manifest if isinstance(manifest, string_types)
|
|
else manifest.name)
|
|
if allow_cached and manifest_path in __load_cache:
|
|
return __load_cache[manifest_path]
|
|
|
|
if isinstance(manifest, string_types):
|
|
if os.path.exists(manifest):
|
|
logger.debug("Opening manifest at %s" % manifest)
|
|
else:
|
|
logger.debug("Creating new manifest at %s" % manifest)
|
|
try:
|
|
with open(manifest, "rb") as f:
|
|
rv = Manifest.from_json(tests_root,
|
|
fast_json.load(f),
|
|
types=types,
|
|
callee_owns_obj=True)
|
|
except IOError:
|
|
return None
|
|
except ValueError:
|
|
logger.warning("%r may be corrupted", manifest)
|
|
return None
|
|
else:
|
|
rv = Manifest.from_json(tests_root,
|
|
fast_json.load(manifest),
|
|
types=types,
|
|
callee_owns_obj=True)
|
|
|
|
if allow_cached:
|
|
__load_cache[manifest_path] = rv
|
|
return rv
|
|
|
|
|
|
def load_and_update(tests_root, # type: bytes
|
|
manifest_path, # type: bytes
|
|
url_base, # type: Text
|
|
update=True, # type: bool
|
|
rebuild=False, # type: bool
|
|
metadata_path=None, # type: Optional[bytes]
|
|
cache_root=None, # type: Optional[bytes]
|
|
working_copy=True, # type: bool
|
|
types=None, # type: Optional[Container[Text]]
|
|
write_manifest=True, # type: bool
|
|
allow_cached=True, # type: bool
|
|
parallel=True # type: bool
|
|
):
|
|
# type: (...) -> Manifest
|
|
logger = get_logger()
|
|
|
|
manifest = None
|
|
if not rebuild:
|
|
try:
|
|
manifest = _load(logger,
|
|
tests_root,
|
|
manifest_path,
|
|
types=types,
|
|
allow_cached=allow_cached)
|
|
except ManifestVersionMismatch:
|
|
logger.info("Manifest version changed, rebuilding")
|
|
|
|
if manifest is not None and manifest.url_base != url_base:
|
|
logger.info("Manifest url base did not match, rebuilding")
|
|
manifest = None
|
|
|
|
if manifest is None:
|
|
manifest = Manifest(tests_root, url_base)
|
|
rebuild = True
|
|
update = True
|
|
|
|
if rebuild or update:
|
|
tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
|
|
working_copy, rebuild)
|
|
changed = manifest.update(tree, parallel)
|
|
if write_manifest and changed:
|
|
write(manifest, manifest_path)
|
|
tree.dump_caches()
|
|
|
|
return manifest
|
|
|
|
|
|
def write(manifest, manifest_path):
|
|
# type: (Manifest, bytes) -> None
|
|
dir_name = os.path.dirname(manifest_path)
|
|
if not os.path.exists(dir_name):
|
|
os.makedirs(dir_name)
|
|
with open(manifest_path, "w") as f:
|
|
# Use ',' instead of the default ', ' separator to prevent trailing
|
|
# spaces: https://docs.python.org/2/library/json.html#json.dump
|
|
json.dump(manifest.to_json(caller_owns_obj=True), f,
|
|
sort_keys=True, indent=1, separators=(',', ': '))
|
|
f.write("\n")
|