forked from mirrors/gecko-dev
Add an --expected-failures option to `mach wpt-inerop-score`. This takes the path to a file which has the format: \# Mark entire test as expected failure /test/id.html \# Mark one subtest as expected failure /another/test/id.html,subtest name The output is the actual interop score, plus two extra columns, one giving the fraction of the score accounted for by expected failures, and one giving the score after accounting for expected failures. Differential Revision: https://phabricator.services.mozilla.com/D188729
302 lines
9.6 KiB
Python
302 lines
9.6 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import argparse
|
|
import csv
|
|
import math
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import tempfile
|
|
from typing import Callable, Iterable, List, Mapping, Optional, Set, Tuple
|
|
|
|
repos = ["autoland", "mozilla-central", "try", "mozilla-central", "mozilla-beta", "wpt"]
|
|
|
|
default_fetch_task_filters = ["-web-platform-tests-|-spidermonkey-"]
|
|
default_interop_task_filters = {
|
|
"wpt": ["-firefox-"],
|
|
None: [
|
|
"web-platform-tests",
|
|
"linux.*-64",
|
|
"/opt",
|
|
"!-nofis|-headless|-asan|-tsan|-ccov",
|
|
],
|
|
}
|
|
|
|
|
|
def get_parser_fetch_logs() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--log-dir", action="store", help="Directory into which to download logs"
|
|
)
|
|
parser.add_argument(
|
|
"--task-filter",
|
|
dest="task_filters",
|
|
action="append",
|
|
help="Regex filter applied to task names. Filters starting ! must not match. Filters starting ^ (after any !) match the entire task name, otherwise any substring can match. Multiple filters must all match",
|
|
)
|
|
parser.add_argument(
|
|
"--check-complete",
|
|
action="store_true",
|
|
help="Only download logs if the task is complete",
|
|
)
|
|
parser.add_argument(
|
|
"commits",
|
|
nargs="+",
|
|
help="repo:commit e.g. mozilla-central:fae24810aef1 for the runs to include",
|
|
)
|
|
return parser
|
|
|
|
|
|
def get_parser_interop_score() -> argparse.Namespace:
|
|
parser = get_parser_fetch_logs()
|
|
parser.add_argument(
|
|
"--year",
|
|
action="store",
|
|
default=2023,
|
|
type=int,
|
|
help="Interop year to score against",
|
|
)
|
|
parser.add_argument(
|
|
"--category-filter",
|
|
action="append",
|
|
dest="category_filters",
|
|
help="Regex filter applied to category names. Filters starting ! must not match. Filters starting ^ (after any !) match the entire task name, otherwise any substring can match. Multiple filters must all match",
|
|
)
|
|
parser.add_argument(
|
|
"--expected-failures",
|
|
help="Path to a file containing a list of tests which are not expected to pass",
|
|
)
|
|
return parser
|
|
|
|
|
|
def print_scores(
|
|
runs: Iterable[Tuple[str, str]],
|
|
results_by_category: Mapping[str, List[int]],
|
|
expected_failures_by_category: Optional[Mapping[str, List[Tuple[int, int]]]],
|
|
include_total: bool,
|
|
):
|
|
include_expected_failures = expected_failures_by_category is not None
|
|
|
|
writer = csv.writer(sys.stdout, delimiter="\t")
|
|
|
|
headers = ["Category"]
|
|
for repo, commit in runs:
|
|
prefix = f"{repo}:{commit}"
|
|
headers.append(f"{prefix}-score")
|
|
if include_expected_failures:
|
|
headers.append(f"{prefix}-expected-failures")
|
|
headers.append(f"{prefix}-adjusted-score")
|
|
|
|
writer.writerow(headers)
|
|
|
|
totals = {"score": [0.0] * len(runs)}
|
|
if include_expected_failures:
|
|
totals["expected_failures"] = [0.0] * len(runs)
|
|
totals["adjusted_score"] = [0.0] * len(runs)
|
|
|
|
for category, category_results in results_by_category.items():
|
|
category_row = []
|
|
category_row.append(category)
|
|
for category_index, result in enumerate(category_results):
|
|
for run_index, run_score in enumerate(category_results):
|
|
category_row.append(f"{run_score / 10:.1f}")
|
|
totals["score"][run_index] += run_score
|
|
if include_expected_failures:
|
|
expected_failures, adjusted_score = expected_failures_by_category[
|
|
category
|
|
][run_index]
|
|
category_row.append(f"{expected_failures / 10:.1f}")
|
|
category_row.append(f"{adjusted_score / 10:.1f}")
|
|
totals["expected_failures"][run_index] += expected_failures
|
|
totals["adjusted_score"][run_index] += adjusted_score
|
|
writer.writerow(category_row)
|
|
|
|
if include_total:
|
|
|
|
def get_total(score, floor=True):
|
|
total = float(score) / (len(results_by_category))
|
|
if floor:
|
|
total = math.floor(total)
|
|
total /= 10.0
|
|
return total
|
|
|
|
totals_row = ["Total"]
|
|
for i in range(len(runs)):
|
|
totals_row.append(f"{get_total(totals['score'][i]):.1f}")
|
|
if include_expected_failures:
|
|
totals_row.append(
|
|
f"{get_total(totals['expected_failures'][i], floor=False):.1f}"
|
|
)
|
|
totals_row.append(f"{get_total(totals['adjusted_score'][i]):.1f}")
|
|
writer.writerow(totals_row)
|
|
|
|
|
|
def get_wptreports(
|
|
repo: str, commit: str, task_filters: List[str], log_dir: str, check_complete: bool
|
|
) -> List[str]:
|
|
import tcfetch
|
|
|
|
return tcfetch.download_artifacts(
|
|
repo,
|
|
commit,
|
|
task_filters=task_filters,
|
|
check_complete=check_complete,
|
|
out_dir=log_dir,
|
|
)
|
|
|
|
|
|
def get_runs(commits: List[str]) -> List[Tuple[str, str]]:
|
|
runs = []
|
|
for item in commits:
|
|
if ":" not in item:
|
|
raise ValueError(f"Expected commits of the form repo:commit, got {item}")
|
|
repo, commit = item.split(":", 1)
|
|
if repo not in repos:
|
|
raise ValueError(f"Unsupported repo {repo}")
|
|
runs.append((repo, commit))
|
|
return runs
|
|
|
|
|
|
def get_category_filter(
|
|
category_filters: Optional[List[str]],
|
|
) -> Optional[Callable[[str], bool]]:
|
|
if category_filters is None:
|
|
return None
|
|
|
|
filters = []
|
|
for item in category_filters:
|
|
if not item:
|
|
continue
|
|
invert = item[0] == "!"
|
|
if invert:
|
|
item = item[1:]
|
|
if item[0] == "^":
|
|
regex = re.compile(item)
|
|
else:
|
|
regex = re.compile(f"^(.*)(?:{item})")
|
|
filters.append((regex, invert))
|
|
|
|
def match_filters(category):
|
|
for regex, invert in filters:
|
|
matches = regex.match(category) is not None
|
|
if invert:
|
|
matches = not matches
|
|
if not matches:
|
|
return False
|
|
return True
|
|
|
|
return match_filters
|
|
|
|
|
|
def fetch_logs(
|
|
commits: List[str],
|
|
task_filters: List[str],
|
|
log_dir: Optional[str],
|
|
check_complete: bool,
|
|
**kwargs,
|
|
):
|
|
runs = get_runs(commits)
|
|
|
|
if not task_filters:
|
|
task_filters = default_fetch_task_filters
|
|
|
|
if log_dir is None:
|
|
log_dir = os.path.abspath(os.curdir)
|
|
|
|
for repo, commit in runs:
|
|
get_wptreports(repo, commit, task_filters, log_dir, check_complete)
|
|
|
|
|
|
def get_expected_failures(path: str) -> Mapping[str, Set[Optional[str]]]:
|
|
expected_failures = {}
|
|
with open(path) as f:
|
|
for i, entry in enumerate(csv.reader(f)):
|
|
entry = [item.strip() for item in entry]
|
|
if not any(item for item in entry) or entry[0][0] == "#":
|
|
continue
|
|
if len(entry) > 2:
|
|
raise ValueError(
|
|
f"{path}:{i+1} expected at most two columns, got {len(entry)}"
|
|
)
|
|
if entry[0][0] != "/":
|
|
raise ValueError(
|
|
f'{path}:{i+1} "{entry[0]}" is not a valid test id (must start with "/")'
|
|
)
|
|
test_id = entry[0]
|
|
if test_id not in expected_failures:
|
|
expected_failures[test_id] = set()
|
|
if len(entry) == 2:
|
|
subtest_id = entry[1]
|
|
if subtest_id == "":
|
|
print(
|
|
f"Warning: {path}:{i+1} got empty string subtest id, remove the trailing comma to make this apply to the full test"
|
|
)
|
|
expected_failures[test_id].add(subtest_id)
|
|
else:
|
|
expected_failures[test_id].add(None)
|
|
return expected_failures
|
|
|
|
|
|
def score_runs(
|
|
commits: List[str],
|
|
task_filters: List[str],
|
|
log_dir: Optional[str],
|
|
year: int,
|
|
check_complete: bool,
|
|
category_filters: Optional[List[str]],
|
|
expected_failures: Optional[str],
|
|
**kwargs,
|
|
):
|
|
from wpt_interop import score
|
|
|
|
runs = get_runs(commits)
|
|
|
|
temp_dir = None
|
|
if log_dir is None:
|
|
temp_dir = tempfile.mkdtemp()
|
|
log_dir = temp_dir
|
|
|
|
try:
|
|
if expected_failures is not None:
|
|
expected_failures_data = get_expected_failures(expected_failures)
|
|
else:
|
|
expected_failures_data = None
|
|
|
|
run_logs = []
|
|
for repo, commit in runs:
|
|
if not task_filters:
|
|
if repo in default_interop_task_filters:
|
|
filters = default_interop_task_filters[repo]
|
|
else:
|
|
filters = default_interop_task_filters[None]
|
|
else:
|
|
filters = task_filters
|
|
|
|
log_paths = get_wptreports(repo, commit, filters, log_dir, check_complete)
|
|
if not log_paths:
|
|
print(f"Failed to get any logs for {repo}:{commit}", file=sys.stderr)
|
|
else:
|
|
run_logs.append(log_paths)
|
|
|
|
if not run_logs:
|
|
print("No logs to process", file=sys.stderr)
|
|
|
|
include_total = category_filters is None
|
|
|
|
category_filter = (
|
|
get_category_filter(category_filters) if category_filters else None
|
|
)
|
|
|
|
scores, expected_failure_scores = score.score_wptreports(
|
|
run_logs,
|
|
year=year,
|
|
category_filter=category_filter,
|
|
expected_failures=expected_failures_data,
|
|
)
|
|
print_scores(runs, scores, expected_failure_scores, include_total)
|
|
finally:
|
|
if temp_dir is not None:
|
|
shutil.rmtree(temp_dir, True)
|