forked from mirrors/gecko-dev
It is not at *all* clear how multiple optimizations for a single task should interact. No simple logical operation is right in all cases, and in fact in most imaginable cases the desired behavior turns out to be independent of all but one of the optimizations. For example, given both `seta` and `skip-unless-files-changed` optimizations, if SETA says to skip a test, it is low value and should be skipped regardless of what files have changed. But if SETA says to run a test, then it has likely been skipped in previous pushes, so it should be run regardless of what has changed in this push. This also adds a bit more output about optimization, that may be useful for anyone wondering why a particular job didn't run. MozReview-Commit-ID: 3OsvRnWjai4 --HG-- extra : rebase_source : ba0aa536e8c474b36c63d1447c83ed9885f1e3e6 extra : source : a3b7bdfdb116300daa3f49e0dfc96177e1369440
233 lines
8.8 KiB
Python
233 lines
8.8 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
import logging
|
|
import os
|
|
import requests
|
|
from collections import defaultdict
|
|
|
|
from .graph import Graph
|
|
from . import files_changed
|
|
from .taskgraph import TaskGraph
|
|
from .util.seta import is_low_value_task
|
|
from .util.taskcluster import find_task_id
|
|
from .util.parameterization import resolve_task_references
|
|
from slugid import nice as slugid
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_optimizations = {}
|
|
|
|
|
|
def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tasks=None):
|
|
"""
|
|
Perform task optimization, without optimizing tasks named in
|
|
do_not_optimize.
|
|
"""
|
|
named_links_dict = target_task_graph.graph.named_links_dict()
|
|
label_to_taskid = {}
|
|
|
|
# This proceeds in two phases. First, mark all optimized tasks (those
|
|
# which will be removed from the graph) as such, including a replacement
|
|
# taskId where applicable. Second, generate a new task graph containing
|
|
# only the non-optimized tasks, with all task labels resolved to taskIds
|
|
# and with task['dependencies'] populated.
|
|
annotate_task_graph(target_task_graph=target_task_graph,
|
|
params=params,
|
|
do_not_optimize=do_not_optimize,
|
|
named_links_dict=named_links_dict,
|
|
label_to_taskid=label_to_taskid,
|
|
existing_tasks=existing_tasks)
|
|
return get_subgraph(target_task_graph, named_links_dict, label_to_taskid), label_to_taskid
|
|
|
|
|
|
def optimize_task(task, params):
|
|
"""
|
|
Run the optimization for a given task
|
|
"""
|
|
if not task.optimization:
|
|
return False
|
|
opt_type, arg = task.optimization.items()[0]
|
|
opt_fn = _optimizations[opt_type]
|
|
return opt_fn(task, params, arg)
|
|
|
|
|
|
def annotate_task_graph(target_task_graph, params, do_not_optimize,
|
|
named_links_dict, label_to_taskid, existing_tasks):
|
|
"""
|
|
Annotate each task in the graph with .optimized (boolean) and .task_id
|
|
(possibly None), following the rules for optimization and calling the task
|
|
kinds' `optimize_task` method.
|
|
|
|
As a side effect, label_to_taskid is updated with labels for all optimized
|
|
tasks that are replaced with existing tasks.
|
|
"""
|
|
|
|
# set .optimized for all tasks, and .task_id for optimized tasks
|
|
# with replacements
|
|
opt_counts = defaultdict(lambda: {'away': 0, 'replaced': 0})
|
|
for label in target_task_graph.graph.visit_postorder():
|
|
task = target_task_graph.tasks[label]
|
|
named_task_dependencies = named_links_dict.get(label, {})
|
|
|
|
# check whether any dependencies have been optimized away
|
|
dependencies = [target_task_graph.tasks[l] for l in named_task_dependencies.itervalues()]
|
|
for t in dependencies:
|
|
if t.optimized and not t.task_id:
|
|
raise Exception(
|
|
"task {} was optimized away, but {} depends on it".format(
|
|
t.label, label))
|
|
|
|
# if this task is blacklisted, don't even consider optimizing
|
|
replacement_task_id = None
|
|
opt_by = None
|
|
if label in do_not_optimize:
|
|
optimized = False
|
|
# Let's check whether this task has been created before
|
|
elif existing_tasks is not None and label in existing_tasks:
|
|
optimized = True
|
|
replacement_task_id = existing_tasks[label]
|
|
opt_by = "existing_tasks"
|
|
# otherwise, examine the task itself (which may be an expensive operation)
|
|
else:
|
|
opt_result = optimize_task(task, params)
|
|
|
|
# use opt_result to determine values for optimized, replacement_task_id
|
|
optimized = bool(opt_result)
|
|
if optimized:
|
|
opt_by = task.optimization.keys()[0]
|
|
replacement_task_id = opt_result if opt_result is not True else None
|
|
|
|
task.optimized = optimized
|
|
task.task_id = replacement_task_id
|
|
if replacement_task_id:
|
|
label_to_taskid[label] = replacement_task_id
|
|
|
|
if optimized:
|
|
if replacement_task_id:
|
|
opt_counts[opt_by]['replaced'] += 1
|
|
logger.debug("optimizing `{}`, replacing with task `{}`"
|
|
.format(label, replacement_task_id))
|
|
else:
|
|
opt_counts[opt_by]['away'] += 1
|
|
logger.debug("optimizing `{}` away".format(label))
|
|
# note: any dependent tasks will fail when they see this
|
|
|
|
for opt_by in sorted(opt_counts):
|
|
counts = opt_counts[opt_by]
|
|
if counts['away'] and not counts['replaced']:
|
|
msg = "optimized away {} tasks for {}: ".format(counts['away'], opt_by)
|
|
elif counts['replaced'] and not counts['away']:
|
|
msg = "optimized {} tasks, replacing with other tasks, for {}: ".format(
|
|
counts['away'], opt_by)
|
|
else:
|
|
msg = "optimized {} tasks for {}, replacing {} and optimizing {} away".format(
|
|
sum(counts.values()), opt_by, counts['replaced'], counts['away'])
|
|
logger.info(msg)
|
|
|
|
|
|
def get_subgraph(annotated_task_graph, named_links_dict, label_to_taskid):
|
|
"""
|
|
Return the subgraph of annotated_task_graph consisting only of
|
|
non-optimized tasks and edges between them.
|
|
|
|
To avoid losing track of taskIds for tasks optimized away, this method
|
|
simultaneously substitutes real taskIds for task labels in the graph, and
|
|
populates each task definition's `dependencies` key with the appropriate
|
|
taskIds. Task references are resolved in the process.
|
|
"""
|
|
|
|
# resolve labels to taskIds and populate task['dependencies']
|
|
tasks_by_taskid = {}
|
|
for label in annotated_task_graph.graph.visit_postorder():
|
|
task = annotated_task_graph.tasks[label]
|
|
if task.optimized:
|
|
continue
|
|
task.task_id = label_to_taskid[label] = slugid()
|
|
named_task_dependencies = {
|
|
name: label_to_taskid[label]
|
|
for name, label in named_links_dict.get(label, {}).iteritems()}
|
|
task.task = resolve_task_references(task.label, task.task, named_task_dependencies)
|
|
task.task.setdefault('dependencies', []).extend(named_task_dependencies.itervalues())
|
|
tasks_by_taskid[task.task_id] = task
|
|
|
|
# resolve edges to taskIds
|
|
edges_by_taskid = (
|
|
(label_to_taskid.get(left), label_to_taskid.get(right), name)
|
|
for (left, right, name) in annotated_task_graph.graph.edges
|
|
)
|
|
# ..and drop edges that are no longer in the task graph
|
|
edges_by_taskid = set(
|
|
(left, right, name)
|
|
for (left, right, name) in edges_by_taskid
|
|
if left in tasks_by_taskid and right in tasks_by_taskid
|
|
)
|
|
|
|
return TaskGraph(
|
|
tasks_by_taskid,
|
|
Graph(set(tasks_by_taskid), edges_by_taskid))
|
|
|
|
|
|
def optimization(name):
|
|
def wrap(func):
|
|
if name in _optimizations:
|
|
raise Exception("multiple optimizations with name {}".format(name))
|
|
_optimizations[name] = func
|
|
return func
|
|
return wrap
|
|
|
|
|
|
@optimization('index-search')
|
|
def opt_index_search(task, params, index_paths):
|
|
for index_path in index_paths:
|
|
try:
|
|
task_id = find_task_id(
|
|
index_path,
|
|
use_proxy=bool(os.environ.get('TASK_ID')))
|
|
return task_id
|
|
except requests.exceptions.HTTPError:
|
|
# 404 will end up here and go on to the next index path
|
|
pass
|
|
|
|
return False
|
|
|
|
|
|
@optimization('seta')
|
|
def opt_seta(task, params, _):
|
|
bbb_task = False
|
|
|
|
# for bbb tasks we need to send in the buildbot buildername
|
|
if task.task.get('provisionerId', '') == 'buildbot-bridge':
|
|
label = task.task.get('payload').get('buildername')
|
|
bbb_task = True
|
|
else:
|
|
label = task.label
|
|
|
|
# we would like to return 'False, None' while it's high_value_task
|
|
# and we wouldn't optimize it. Otherwise, it will return 'True, None'
|
|
if is_low_value_task(label,
|
|
params.get('project'),
|
|
params.get('pushlog_id'),
|
|
params.get('pushdate'),
|
|
bbb_task):
|
|
# Always optimize away low-value tasks
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
@optimization('skip-unless-changed')
|
|
def opt_files_changed(task, params, file_patterns):
|
|
# pushlog_id == -1 - this is the case when run from a cron.yml job
|
|
if params.get('pushlog_id') == -1:
|
|
return True
|
|
|
|
changed = files_changed.check(params, file_patterns)
|
|
if not changed:
|
|
logger.debug('no files found matching a pattern in `skip-unless-changed` for ' +
|
|
task.label)
|
|
return True
|
|
return False
|