fune/taskcluster/taskgraph/optimize/seta.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals

import json
import logging

import attr
import requests
from redo import retry
from requests import exceptions

from taskgraph.optimize import OptimizationStrategy, register_strategy

logger = logging.getLogger(__name__)

# It's a list of project name which SETA is useful on
SETA_PROJECTS = ['autoland', 'try']
SETA_HIGH_PRIORITY = 1
SETA_LOW_PRIORITY = 5

SETA_ENDPOINT = "https://treeherder.mozilla.org/api/project/%s/seta/" \
                "job-priorities/?build_system_type=%s&priority=%s"


@attr.s(frozen=True)
class SETA(object):
    """
    Interface to the SETA service, which defines low-value tasks that can be optimized out
    of the taskgraph.
    """

    # cached low value tasks, by project
    low_value_tasks = attr.ib(factory=dict, init=False)
    low_value_bb_tasks = attr.ib(factory=dict, init=False)

    def _get_task_string(self, task_tuple):
        # convert task tuple to single task string, so the task label sent in can match
        # remove any empty parts of the tuple
        task_tuple = [x for x in task_tuple if len(x) != 0]

        if len(task_tuple) == 0:
            return ''
        if len(task_tuple) != 3:
            return ' '.join(task_tuple)

        return 'test-%s/%s-%s' % (task_tuple[0], task_tuple[1], task_tuple[2])

    def query_low_value_tasks(self, project):
        # Request the set of low value tasks from the SETA service.  Low value
        # tasks will be optimized out of the task graph.
        low_value_tasks = set()

        # we want to get low priority taskcluster jobs
        url_low = SETA_ENDPOINT % (project, 'taskcluster', SETA_LOW_PRIORITY)
        url_high = SETA_ENDPOINT % (project, 'taskcluster', SETA_HIGH_PRIORITY)

        # Try to fetch the SETA data twice, falling back to an empty list of low value tasks.
        # There are 10 seconds between each try.
        try:
            logger.debug("Retrieving low-value jobs list from SETA")
            response = retry(requests.get, attempts=2, sleeptime=10,
                             args=(url_low, ),
                             kwargs={'timeout': 60, 'headers': ''})
            task_list = json.loads(response.content).get('jobtypes', '')

            if type(task_list) == dict and len(task_list) > 0:
                if type(task_list.values()[0]) == list and len(task_list.values()[0]) > 0:
                    low_value_tasks = set(task_list.values()[0])

            # hack seta tasks to run 'opt' jobs on 'pgo' builds - see Bug 1522111
            logger.debug("Retrieving high-value jobs list from SETA")
            response = retry(requests.get, attempts=2, sleeptime=10,
                             args=(url_high, ),
                             kwargs={'timeout': 60, 'headers': ''})
            task_list = json.loads(response.content).get('jobtypes', '')

            high_value_tasks = set()
            if type(task_list) == dict and len(task_list) > 0:
                if type(task_list.values()[0]) == list and len(task_list.values()[0]) > 0:
                    high_value_tasks = set(task_list.values()[0])

            # hack seta to treat all Android Raptor tasks as low value - see Bug 1535016
            def only_android_raptor(task):
                return task.startswith('test-android') and 'raptor' in task

            high_value_android_tasks = set(filter(only_android_raptor, high_value_tasks))
            low_value_tasks.update(high_value_android_tasks)

            seta_conversions = {
                # old: new
                'test-linux64/opt': 'test-linux64-shippable/opt',
                'test-linux64-qr/opt': 'test-linux64-shippable-qr/opt',
                'test-windows7-32/opt': 'test-windows7-32-shippable/opt',
                'test-windows10-64/opt': 'test-windows10-64-shippable/opt',
                'test-windows10-64-qr/opt': 'test-windows10-64-shippable-qr/opt',
                }
            # Now add new variants to the low-value set
            for old, new in seta_conversions.iteritems():
                if any(t.startswith(old) for t in low_value_tasks):
                    low_value_tasks.update(
                        [t.replace(old, new) for t in low_value_tasks]
                    )

            # ... and the high value list
            for old, new in seta_conversions.iteritems():
                if any(t.startswith(old) for t in high_value_tasks):
                    high_value_tasks.update(
                        [t.replace(old, new) for t in high_value_tasks]
                    )

            def new_as_old_is_high_value(label):
                # This doesn't care if there are multiple old values for one new
                # it will always check every old value.
                for old, new in seta_conversions.iteritems():
                    if label.startswith(new):
                        old_label = label.replace(new, old)
                        if old_label in high_value_tasks:
                            return True
                return False

            # Now rip out from low value things that were high value in opt
            low_value_tasks = {
                x for x in low_value_tasks if not new_as_old_is_high_value(x)
            }

            # ensure no non-fuzzing build tasks slipped in, we never want to optimize out those
            low_value_tasks = {
                x for x in low_value_tasks if 'build' not in x or 'fuzzing' in x
            }

        # In the event of request times out, requests will raise a TimeoutError.
        except exceptions.Timeout:
            logger.warning("SETA timeout, we will treat all test tasks as high value.")

        # In the event of a network problem (e.g. DNS failure, refused connection, etc),
        # requests will raise a ConnectionError.
        except exceptions.ConnectionError:
            logger.warning("SETA connection error, we will treat all test tasks as high value.")

        # In the event of the rare invalid HTTP response(e.g 404, 401),
        # requests will raise an HTTPError exception
        except exceptions.HTTPError:
            logger.warning("We got bad Http response from ouija,"
                           " we will treat all test tasks as high value.")

        # We just print the error out as a debug message if we failed to catch the exception above
        except exceptions.RequestException as error:
            logger.warning(error)

        # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
        except ValueError as error:
            logger.warning("Invalid JSON, possible server error: {}".format(error))

        return low_value_tasks

    def is_low_value_task(self, label, project):
        # marking a task as low_value means it will be optimized out by tc
        if project not in SETA_PROJECTS:
            return False

        # The SETA service has a superficial check preventing try, so spoof autoland
        project = 'autoland'

        # cache the low value tasks per project to avoid repeated SETA server queries
        if project not in self.low_value_tasks:
            self.low_value_tasks[project] = self.query_low_value_tasks(project)
        return label in self.low_value_tasks[project]


# create a single instance of this class, and expose its `is_low_value_task`
# bound method as a module-level function
is_low_value_task = SETA().is_low_value_task


@register_strategy('seta')
class SkipLowValue(OptimizationStrategy):

    def should_remove_task(self, task, params, _):
        # Return True to optimize a low value task.
        return is_low_value_task(task.label, params['project'])