Bug 1885839 - Compute backstop interval based on pushlog_id of the last backstop, r=jcristau,taskgraph-reviewers

This computes backstop interval based on the pushlog_id of the last backstop, rather than naively making every 20th pushid a backstop. This solves the problem of when the 20th pushid ends up being a DONTBUILD push. Previously, this would have meant we miss an entire backstop and need to wait another 20 pushes. Now, it means the next non-DONTBUILD push will become the backstop. Differential Revision: https://phabricator.services.mozilla.com/D206243
2024-04-02 16:02:12 +00:00 · 2024-04-02 16:02:12 +00:00 · 06e86851a0
commit 06e86851a0
parent 9c27c09ee4
3 changed files with 50 additions and 26 deletions
--- a/taskcluster/gecko_taskgraph/decision.py
+++ b/taskcluster/gecko_taskgraph/decision.py
@ -452,14 +452,17 @@ def set_try_config(parameters, task_config_file):
 def set_decision_indexes(decision_task_id, params, graph_config):
    index_paths = []
    if params["backstop"]:
-        index_paths.append(BACKSTOP_INDEX)
+        # When two Decision tasks run at nearly the same time, it's possible
        # they both end up being backstops if the second checks the backstop
        # index before the first inserts it. Insert this index first to reduce
        # the chances of that happening.
        index_paths.insert(0, BACKSTOP_INDEX)
    subs = params.copy()
    subs["trust-domain"] = graph_config["trust-domain"]
    index_paths = [i.format(**subs) for i in index_paths]
    for index_path in index_paths:
-        insert_index(index_path, decision_task_id, use_proxy=True)
+        insert_index(index_path.format(**subs), decision_task_id, use_proxy=True)
 def write_artifact(filename, data):
--- a/taskcluster/gecko_taskgraph/test/test_util_backstop.py
+++ b/taskcluster/gecko_taskgraph/test/test_util_backstop.py
@ -18,20 +18,22 @@ from gecko_taskgraph.util.backstop import (
    is_backstop,
 )
-LAST_BACKSTOP_ID = 0
+LAST_BACKSTOP_PUSHID = 1
 LAST_BACKSTOP_PUSHDATE = mktime(datetime.now().timetuple())
 DEFAULT_RESPONSES = {
    "index": {
        "status": 200,
-        "json": {"taskId": LAST_BACKSTOP_ID},
+        "json": {"taskId": LAST_BACKSTOP_PUSHID},
    },
    "artifact": {
        "status": 200,
        "body": dedent(
            """
            pushdate: {}
            pushlog_id: {}
        """.format(
-                LAST_BACKSTOP_PUSHDATE
+                LAST_BACKSTOP_PUSHDATE,
                LAST_BACKSTOP_PUSHID,
            )
        ),
    },
@ -50,7 +52,8 @@ def params():
        "head_rev": "abcdef",
        "project": "autoland",
        "pushdate": LAST_BACKSTOP_PUSHDATE + 1,
-        "pushlog_id": LAST_BACKSTOP_ID + 1,
+        "pushlog_id": LAST_BACKSTOP_PUSHID + 1,
        "target_tasks_method": "default",
    }
@ -78,8 +81,8 @@ def params():
        pytest.param(
            DEFAULT_RESPONSES,
            {
-                "pushlog_id": LAST_BACKSTOP_ID + 1,
+                "pushlog_id": LAST_BACKSTOP_PUSHID + BACKSTOP_PUSH_INTERVAL - 1,
-                "pushdate": LAST_BACKSTOP_PUSHDATE + 1,
+                "pushdate": LAST_BACKSTOP_PUSHDATE + (BACKSTOP_TIME_INTERVAL * 60) - 1,
            },
            False,
            id="not a backstop",
@ -87,10 +90,26 @@ def params():
        pytest.param(
            {},
            {
-                "pushlog_id": BACKSTOP_PUSH_INTERVAL,
+                "target_tasks_method": "nothing",
            },
            False,
            id="dontbuild",
        ),
        pytest.param(
            DEFAULT_RESPONSES,
            {
                "pushlog_id": LAST_BACKSTOP_PUSHID + BACKSTOP_PUSH_INTERVAL,
            },
            True,
-            id="backstop interval",
+            id="interval",
        ),
        pytest.param(
            DEFAULT_RESPONSES,
            {
                "pushlog_id": LAST_BACKSTOP_PUSHID + BACKSTOP_PUSH_INTERVAL + 1,
            },
            True,
            id="greater than interval",
        ),
        pytest.param(
            DEFAULT_RESPONSES,
@ -138,8 +157,8 @@ def test_is_backstop(responses, params, response_args, extra_params, expected):
                **{"trust-domain": "gecko", "project": params["project"]}
            )
        ),
-        "artifact": get_artifact_url(LAST_BACKSTOP_ID, "public/parameters.yml"),
+        "artifact": get_artifact_url(LAST_BACKSTOP_PUSHID, "public/parameters.yml"),
-        "status": get_task_url(LAST_BACKSTOP_ID) + "/status",
+        "status": get_task_url(LAST_BACKSTOP_PUSHID) + "/status",
    }
    for key in ("index", "status", "artifact"):
--- a/taskcluster/gecko_taskgraph/util/backstop.py
+++ b/taskcluster/gecko_taskgraph/util/backstop.py
@ -37,22 +37,17 @@ def is_backstop(
        return True
    project = params["project"]
    pushid = int(params["pushlog_id"])
    pushdate = int(params["pushdate"])
    if project in TRY_PROJECTS:
        return False
    if project not in integration_projects:
        return True
-    # On every Nth push, want to run all tasks.
+    # This push was explicitly set to run nothing (e.g via DONTBUILD), so
-    if pushid % push_interval == 0:
+    # shouldn't be a backstop candidate.
-        return True
+    if params["target_tasks_method"] == "nothing":
    if time_interval <= 0:
        return False
-    # We also want to ensure we run all tasks at least once per N minutes.
+    # Find the last backstop to compute push and time intervals.
    subs = {"trust-domain": trust_domain, "project": project}
    index = BACKSTOP_INDEX.format(**subs)
@ -67,9 +62,7 @@ def is_backstop(
        return True
    try:
-        last_pushdate = get_artifact(last_backstop_id, "public/parameters.yml")[
+        last_params = get_artifact(last_backstop_id, "public/parameters.yml")
            "pushdate"
        ]
    except HTTPError as e:
        # If the last backstop decision task exists in the index, but
        # parameters.yml isn't available yet, it means the decision task is
@ -79,6 +72,15 @@ def is_backstop(
            return False
        raise
-    if (pushdate - last_pushdate) / 60 >= time_interval:
+    # On every Nth push, want to run all tasks.
    if params["pushlog_id"] - last_params["pushlog_id"] >= push_interval:
        return True
    if time_interval <= 0:
        return False
    # We also want to ensure we run all tasks at least once per N minutes.
    if (params["pushdate"] - last_params["pushdate"]) / 60 >= time_interval:
        return True
    return False