fune/taskcluster/taskgraph/optimize/seta.py
Andrew Halberstadt 3362e14674 Bug 1625200 - [taskgraph] Pull the 10th push backstop out of SETA, r=tomprince
We'll want some kind of backstop no matter what optimization algorithm we use.
We don't want to go too long without running any given task so we can find
regressions quickly and have a good merge candidate.

This pulls the logic that handles this out of the SETA strategy and into its
own strategy.

This will also make the SETA shadow scheduler more representative of what the
algorithm is doing.

Note in the future we may find ways to make this backstop more efficient (i.e
only run tasks that didn't run in the last 9 pushes for example).

Depends on D68621

Differential Revision: https://phabricator.services.mozilla.com/D68622

--HG--
extra : moz-landing-system : lando
2020-04-15 19:45:34 +00:00

183 lines
7.7 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
import json
import logging
import attr
import requests
from redo import retry
from requests import exceptions
from taskgraph.optimize import OptimizationStrategy, register_strategy
logger = logging.getLogger(__name__)
# It's a list of project name which SETA is useful on
SETA_PROJECTS = ['autoland', 'try']
SETA_HIGH_PRIORITY = 1
SETA_LOW_PRIORITY = 5
SETA_ENDPOINT = "https://treeherder.mozilla.org/api/project/%s/seta/" \
"job-priorities/?build_system_type=%s&priority=%s"
@attr.s(frozen=True)
class SETA(object):
"""
Interface to the SETA service, which defines low-value tasks that can be optimized out
of the taskgraph.
"""
# cached low value tasks, by project
low_value_tasks = attr.ib(factory=dict, init=False)
low_value_bb_tasks = attr.ib(factory=dict, init=False)
def _get_task_string(self, task_tuple):
# convert task tuple to single task string, so the task label sent in can match
# remove any empty parts of the tuple
task_tuple = [x for x in task_tuple if len(x) != 0]
if len(task_tuple) == 0:
return ''
if len(task_tuple) != 3:
return ' '.join(task_tuple)
return 'test-%s/%s-%s' % (task_tuple[0], task_tuple[1], task_tuple[2])
def query_low_value_tasks(self, project):
# Request the set of low value tasks from the SETA service. Low value
# tasks will be optimized out of the task graph.
low_value_tasks = set()
# we want to get low priority taskcluster jobs
url_low = SETA_ENDPOINT % (project, 'taskcluster', SETA_LOW_PRIORITY)
url_high = SETA_ENDPOINT % (project, 'taskcluster', SETA_HIGH_PRIORITY)
# Try to fetch the SETA data twice, falling back to an empty list of low value tasks.
# There are 10 seconds between each try.
try:
logger.debug("Retrieving low-value jobs list from SETA")
response = retry(requests.get, attempts=2, sleeptime=10,
args=(url_low, ),
kwargs={'timeout': 60, 'headers': ''})
task_list = json.loads(response.content).get('jobtypes', '')
if type(task_list) == dict and len(task_list) > 0:
if type(task_list.values()[0]) == list and len(task_list.values()[0]) > 0:
low_value_tasks = set(task_list.values()[0])
# hack seta tasks to run 'opt' jobs on 'pgo' builds - see Bug 1522111
logger.debug("Retrieving high-value jobs list from SETA")
response = retry(requests.get, attempts=2, sleeptime=10,
args=(url_high, ),
kwargs={'timeout': 60, 'headers': ''})
task_list = json.loads(response.content).get('jobtypes', '')
high_value_tasks = set()
if type(task_list) == dict and len(task_list) > 0:
if type(task_list.values()[0]) == list and len(task_list.values()[0]) > 0:
high_value_tasks = set(task_list.values()[0])
# hack seta to treat all Android Raptor tasks as low value - see Bug 1535016
def only_android_raptor(task):
return task.startswith('test-android') and 'raptor' in task
high_value_android_tasks = set(filter(only_android_raptor, high_value_tasks))
low_value_tasks.update(high_value_android_tasks)
seta_conversions = {
# old: new
'test-linux64/opt': 'test-linux64-shippable/opt',
'test-linux64-qr/opt': 'test-linux64-shippable-qr/opt',
'test-windows7-32/opt': 'test-windows7-32-shippable/opt',
'test-windows10-64/opt': 'test-windows10-64-shippable/opt',
'test-windows10-64-qr/opt': 'test-windows10-64-shippable-qr/opt',
}
# Now add new variants to the low-value set
for old, new in seta_conversions.iteritems():
if any(t.startswith(old) for t in low_value_tasks):
low_value_tasks.update(
[t.replace(old, new) for t in low_value_tasks]
)
# ... and the high value list
for old, new in seta_conversions.iteritems():
if any(t.startswith(old) for t in high_value_tasks):
high_value_tasks.update(
[t.replace(old, new) for t in high_value_tasks]
)
def new_as_old_is_high_value(label):
# This doesn't care if there are multiple old values for one new
# it will always check every old value.
for old, new in seta_conversions.iteritems():
if label.startswith(new):
old_label = label.replace(new, old)
if old_label in high_value_tasks:
return True
return False
# Now rip out from low value things that were high value in opt
low_value_tasks = {
x for x in low_value_tasks if not new_as_old_is_high_value(x)
}
# ensure no non-fuzzing build tasks slipped in, we never want to optimize out those
low_value_tasks = {
x for x in low_value_tasks if 'build' not in x or 'fuzzing' in x
}
# In the event of request times out, requests will raise a TimeoutError.
except exceptions.Timeout:
logger.warning("SETA timeout, we will treat all test tasks as high value.")
# In the event of a network problem (e.g. DNS failure, refused connection, etc),
# requests will raise a ConnectionError.
except exceptions.ConnectionError:
logger.warning("SETA connection error, we will treat all test tasks as high value.")
# In the event of the rare invalid HTTP response(e.g 404, 401),
# requests will raise an HTTPError exception
except exceptions.HTTPError:
logger.warning("We got bad Http response from ouija,"
" we will treat all test tasks as high value.")
# We just print the error out as a debug message if we failed to catch the exception above
except exceptions.RequestException as error:
logger.warning(error)
# When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
except ValueError as error:
logger.warning("Invalid JSON, possible server error: {}".format(error))
return low_value_tasks
def is_low_value_task(self, label, project):
# marking a task as low_value means it will be optimized out by tc
if project not in SETA_PROJECTS:
return False
# The SETA service has a superficial check preventing try, so spoof autoland
project = 'autoland'
# cache the low value tasks per project to avoid repeated SETA server queries
if project not in self.low_value_tasks:
self.low_value_tasks[project] = self.query_low_value_tasks(project)
return label in self.low_value_tasks[project]
# create a single instance of this class, and expose its `is_low_value_task`
# bound method as a module-level function
is_low_value_task = SETA().is_low_value_task
@register_strategy('seta')
class SkipLowValue(OptimizationStrategy):
def should_remove_task(self, task, params, _):
# Return True to optimize a low value task.
return is_low_value_task(task.label, params['project'])