forked from mirrors/gecko-dev
		
	Instead of a boolean, it's now a set of projects for which tasks should be removed. If they project doesn't match the specified set it will be kept. This ensures tasks that have these optimzers applied won't run on |mach try auto|. Differential Revision: https://phabricator.services.mozilla.com/D76987
		
			
				
	
	
		
			139 lines
		
	
	
	
		
			6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			139 lines
		
	
	
	
		
			6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# This Source Code Form is subject to the terms of the Mozilla Public
 | 
						|
# License, v. 2.0. If a copy of the MPL was not distributed with this
 | 
						|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | 
						|
 | 
						|
from __future__ import absolute_import, print_function, unicode_literals
 | 
						|
 | 
						|
import logging
 | 
						|
from collections import defaultdict
 | 
						|
 | 
						|
import requests
 | 
						|
from redo import retry
 | 
						|
 | 
						|
from taskgraph.optimize import OptimizationStrategy, register_strategy
 | 
						|
from taskgraph.util.attributes import match_run_on_projects
 | 
						|
 | 
						|
logger = logging.getLogger(__name__)
 | 
						|
PUSH_ENDPOINT = "{head_repository}/json-pushes/?startID={push_id_start}&endID={push_id_end}"
 | 
						|
 | 
						|
 | 
						|
@register_strategy('backstop', args=(10, 60, {'all'}))
 | 
						|
@register_strategy("push-interval-10", args=(10, 0, {'try'}))
 | 
						|
@register_strategy("push-interval-25", args=(25, 0, {'try'}))
 | 
						|
class Backstop(OptimizationStrategy):
 | 
						|
    """Ensures that no task gets left behind.
 | 
						|
 | 
						|
    Will schedule all tasks either every Nth push, or M minutes. This behaviour
 | 
						|
    is only enabled on autoland. For all other projects, the
 | 
						|
    `remove_on_projects` flag determines what will happen.
 | 
						|
 | 
						|
    Args:
 | 
						|
        push_interval (int): Number of pushes
 | 
						|
        time_interval (int): Minutes between forced schedules.
 | 
						|
                             Use 0 to disable.
 | 
						|
        remove_on_projects (set): For non-autoland projects, the task will
 | 
						|
            be removed if we're running on one of these projects, otherwise
 | 
						|
            it will be kept.
 | 
						|
    """
 | 
						|
    def __init__(self, push_interval, time_interval, remove_on_projects):
 | 
						|
        self.push_interval = push_interval
 | 
						|
        self.time_interval = time_interval
 | 
						|
        self.remove_on_projects = remove_on_projects
 | 
						|
 | 
						|
        # cached push dates by project
 | 
						|
        self.push_dates = defaultdict(dict)
 | 
						|
        # cached push_ids that failed to retrieve datetime for
 | 
						|
        self.failed_json_push_calls = []
 | 
						|
 | 
						|
    def should_remove_task(self, task, params, _):
 | 
						|
        project = params['project']
 | 
						|
        pushid = int(params['pushlog_id'])
 | 
						|
        pushdate = int(params['pushdate'])
 | 
						|
 | 
						|
        # Scheduling on a backstop only makes sense on autoland. For other projects,
 | 
						|
        # remove the task if the project matches self.remove_on_projects.
 | 
						|
        if project != 'autoland':
 | 
						|
            return match_run_on_projects(project, self.remove_on_projects)
 | 
						|
 | 
						|
        # On every Nth push, want to run all tasks.
 | 
						|
        if pushid % self.push_interval == 0:
 | 
						|
            return False
 | 
						|
 | 
						|
        # We also want to ensure we run all tasks at least once per N minutes.
 | 
						|
        if self.time_interval > 0 and self.minutes_between_pushes(
 | 
						|
                params["head_repository"],
 | 
						|
                project,
 | 
						|
                pushid,
 | 
						|
                pushdate) >= self.time_interval:
 | 
						|
            return False
 | 
						|
        return True
 | 
						|
 | 
						|
    def minutes_between_pushes(self, repository, project, cur_push_id, cur_push_date):
 | 
						|
        # figure out the minutes that have elapsed between the current push and previous one
 | 
						|
        # defaulting to max min so if we can't get value, defaults to run the task
 | 
						|
        min_between_pushes = self.time_interval
 | 
						|
        prev_push_id = cur_push_id - 1
 | 
						|
 | 
						|
        # cache the pushdate for the current push so we can use it next time
 | 
						|
        self.push_dates[project].update({cur_push_id: cur_push_date})
 | 
						|
 | 
						|
        # check if we already have the previous push id's datetime cached
 | 
						|
        prev_push_date = self.push_dates[project].get(prev_push_id, 0)
 | 
						|
 | 
						|
        # we have datetime of current and previous push, so return elapsed minutes and bail
 | 
						|
        if cur_push_date > 0 and prev_push_date > 0:
 | 
						|
            return (cur_push_date - prev_push_date) / 60
 | 
						|
 | 
						|
        # datetime for previous pushid not cached, so must retrieve it
 | 
						|
        # if we already tried to retrieve the datetime for this pushid
 | 
						|
        # before and the json-push request failed, don't try it again
 | 
						|
        if prev_push_id in self.failed_json_push_calls:
 | 
						|
            return min_between_pushes
 | 
						|
 | 
						|
        url = PUSH_ENDPOINT.format(
 | 
						|
            head_repository=repository,
 | 
						|
            push_id_start=prev_push_id - 1,
 | 
						|
            push_id_end=prev_push_id,
 | 
						|
        )
 | 
						|
 | 
						|
        try:
 | 
						|
            response = retry(requests.get, attempts=2, sleeptime=10,
 | 
						|
                             args=(url, ),
 | 
						|
                             kwargs={'timeout': 60, 'headers': {'User-Agent': 'TaskCluster'}})
 | 
						|
            prev_push_date = response.json().get(str(prev_push_id), {}).get('date', 0)
 | 
						|
 | 
						|
            # cache it for next time
 | 
						|
            self.push_dates[project].update({prev_push_id: prev_push_date})
 | 
						|
 | 
						|
            # now have datetime of current and previous push
 | 
						|
            if cur_push_date > 0 and prev_push_date > 0:
 | 
						|
                min_between_pushes = (cur_push_date - prev_push_date) / 60
 | 
						|
 | 
						|
        # In the event of request times out, requests will raise a TimeoutError.
 | 
						|
        except requests.exceptions.Timeout:
 | 
						|
            logger.warning("json-pushes timeout, enabling backstop")
 | 
						|
            self.failed_json_push_calls.append(prev_push_id)
 | 
						|
 | 
						|
        # In the event of a network problem (e.g. DNS failure, refused connection, etc),
 | 
						|
        # requests will raise a ConnectionError.
 | 
						|
        except requests.exceptions.ConnectionError:
 | 
						|
            logger.warning("json-pushes connection error, enabling backstop")
 | 
						|
            self.failed_json_push_calls.append(prev_push_id)
 | 
						|
 | 
						|
        # In the event of the rare invalid HTTP response(e.g 404, 401),
 | 
						|
        # requests will raise an HTTPError exception
 | 
						|
        except requests.exceptions.HTTPError:
 | 
						|
            logger.warning("Bad Http response, enabling backstop")
 | 
						|
            self.failed_json_push_calls.append(prev_push_id)
 | 
						|
 | 
						|
        # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
 | 
						|
        except ValueError as error:
 | 
						|
            logger.warning("Invalid JSON, possible server error: {}".format(error))
 | 
						|
            self.failed_json_push_calls.append(prev_push_id)
 | 
						|
 | 
						|
        # We just print the error out as a debug message if we failed to catch the exception above
 | 
						|
        except requests.exceptions.RequestException as error:
 | 
						|
            logger.warning(error)
 | 
						|
            self.failed_json_push_calls.append(prev_push_id)
 | 
						|
 | 
						|
        return min_between_pushes
 |