forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			539 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			539 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # This Source Code Form is subject to the terms of the Mozilla Public
 | |
| # License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | |
| 
 | |
| 
 | |
| import logging
 | |
| import os
 | |
| import re
 | |
| import subprocess
 | |
| from abc import ABC, abstractmethod, abstractproperty
 | |
| from shutil import which
 | |
| 
 | |
| import requests
 | |
| from redo import retry
 | |
| 
 | |
| from taskgraph.util.path import ancestors
 | |
| 
 | |
| PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1"
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| class Repository(ABC):
 | |
|     # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
 | |
|     # the same value as the null revision.
 | |
|     #
 | |
|     # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
 | |
|     # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
 | |
|     NULL_REVISION = "0000000000000000000000000000000000000000"
 | |
| 
 | |
|     def __init__(self, path):
 | |
|         self.path = path
 | |
|         self.binary = which(self.tool)
 | |
|         if self.binary is None:
 | |
|             raise OSError(f"{self.tool} not found!")
 | |
|         self._valid_diff_filter = ("m", "a", "d")
 | |
| 
 | |
|         self._env = os.environ.copy()
 | |
| 
 | |
|     def run(self, *args: str, **kwargs):
 | |
|         return_codes = kwargs.pop("return_codes", [])
 | |
|         cmd = (self.binary,) + args
 | |
| 
 | |
|         try:
 | |
|             return subprocess.check_output(
 | |
|                 cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs
 | |
|             )
 | |
|         except subprocess.CalledProcessError as e:
 | |
|             if e.returncode in return_codes:
 | |
|                 return ""
 | |
|             raise
 | |
| 
 | |
|     @abstractproperty
 | |
|     def tool(self) -> str:
 | |
|         """Version control system being used, either 'hg' or 'git'."""
 | |
| 
 | |
|     @abstractproperty
 | |
|     def head_rev(self) -> str:
 | |
|         """Hash of HEAD revision."""
 | |
| 
 | |
|     @abstractproperty
 | |
|     def base_rev(self):
 | |
|         """Hash of revision the current topic branch is based on."""
 | |
| 
 | |
|     @abstractproperty
 | |
|     def branch(self):
 | |
|         """Current branch or bookmark the checkout has active."""
 | |
| 
 | |
|     @abstractproperty
 | |
|     def all_remote_names(self):
 | |
|         """Name of all configured remote repositories."""
 | |
| 
 | |
|     @abstractproperty
 | |
|     def default_remote_name(self):
 | |
|         """Name the VCS defines for the remote repository when cloning
 | |
|         it for the first time. This name may not exist anymore if users
 | |
|         changed the default configuration, for instance."""
 | |
| 
 | |
|     @abstractproperty
 | |
|     def remote_name(self):
 | |
|         """Name of the remote repository."""
 | |
| 
 | |
|     def _get_most_suitable_remote(self, remote_instructions):
 | |
|         remotes = self.all_remote_names
 | |
|         if len(remotes) == 1:
 | |
|             return remotes[0]
 | |
| 
 | |
|         if self.default_remote_name in remotes:
 | |
|             return self.default_remote_name
 | |
| 
 | |
|         first_remote = remotes[0]
 | |
|         logger.warning(
 | |
|             f"Unable to determine which remote repository to use between: {remotes}. "
 | |
|             f'Arbitrarily using the first one "{first_remote}". Please set an '
 | |
|             f"`{self.default_remote_name}` remote if the arbitrarily selected one "
 | |
|             f"is not right. To do so: {remote_instructions}"
 | |
|         )
 | |
| 
 | |
|         return first_remote
 | |
| 
 | |
|     @abstractproperty
 | |
|     def default_branch(self):
 | |
|         """Name of the default branch."""
 | |
| 
 | |
|     @abstractmethod
 | |
|     def get_url(self, remote=None):
 | |
|         """Get URL of the upstream repository."""
 | |
| 
 | |
|     @abstractmethod
 | |
|     def get_commit_message(self, revision=None):
 | |
|         """Commit message of specified revision or current commit."""
 | |
| 
 | |
|     @abstractmethod
 | |
|     def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None):
 | |
|         """Return a list of files that are changed in:
 | |
|          * either this repository's working copy,
 | |
|          * or at a given revision (``rev``)
 | |
|          * or between 2 revisions (``base_rev`` and ``rev``)
 | |
| 
 | |
|         ``diff_filter`` controls which kinds of modifications are returned.
 | |
|         It is a string which may only contain the following characters:
 | |
| 
 | |
|             A - Include files that were added
 | |
|             D - Include files that were deleted
 | |
|             M - Include files that were modified
 | |
| 
 | |
|         By default, all three will be included.
 | |
| 
 | |
|         ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
 | |
|         effect on git. Defaults to 'unstaged'.
 | |
| 
 | |
|         ``rev`` is a specifier for which changesets to consider for
 | |
|         changes. The exact meaning depends on the vcs system being used.
 | |
| 
 | |
|         ``base_rev`` specifies the range of changesets. This parameter cannot
 | |
|         be used without ``rev``. The range includes ``rev`` but excludes
 | |
|         ``base_rev``.
 | |
|         """
 | |
| 
 | |
|     @abstractmethod
 | |
|     def get_outgoing_files(self, diff_filter, upstream):
 | |
|         """Return a list of changed files compared to upstream.
 | |
| 
 | |
|         ``diff_filter`` works the same as `get_changed_files`.
 | |
|         ``upstream`` is a remote ref to compare against. If unspecified,
 | |
|         this will be determined automatically. If there is no remote ref,
 | |
|         a MissingUpstreamRepo exception will be raised.
 | |
|         """
 | |
| 
 | |
|     @abstractmethod
 | |
|     def working_directory_clean(self, untracked=False, ignored=False):
 | |
|         """Determine if the working directory is free of modifications.
 | |
| 
 | |
|         Returns True if the working directory does not have any file
 | |
|         modifications. False otherwise.
 | |
| 
 | |
|         By default, untracked and ignored files are not considered. If
 | |
|         ``untracked`` or ``ignored`` are set, they influence the clean check
 | |
|         to factor these file classes into consideration.
 | |
|         """
 | |
| 
 | |
|     @abstractmethod
 | |
|     def update(self, ref):
 | |
|         """Update the working directory to the specified reference."""
 | |
| 
 | |
|     @abstractmethod
 | |
|     def find_latest_common_revision(self, base_ref_or_rev, head_rev):
 | |
|         """Find the latest revision that is common to both the given
 | |
|         ``head_rev`` and ``base_ref_or_rev``"""
 | |
| 
 | |
|     @abstractmethod
 | |
|     def does_revision_exist_locally(self, revision):
 | |
|         """Check whether this revision exists in the local repository.
 | |
| 
 | |
|         If this function returns an unexpected value, then make sure
 | |
|         the revision was fetched from the remote repository."""
 | |
| 
 | |
| 
 | |
| class HgRepository(Repository):
 | |
|     tool = "hg"
 | |
|     default_remote_name = "default"
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         super().__init__(*args, **kwargs)
 | |
|         self._env["HGPLAIN"] = "1"
 | |
| 
 | |
|     @property
 | |
|     def head_rev(self):
 | |
|         return self.run("log", "-r", ".", "-T", "{node}").strip()
 | |
| 
 | |
|     @property
 | |
|     def base_rev(self):
 | |
|         return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}")
 | |
| 
 | |
|     @property
 | |
|     def branch(self):
 | |
|         bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current")
 | |
|         if os.path.exists(bookmarks_fn):
 | |
|             with open(bookmarks_fn) as f:
 | |
|                 bookmark = f.read()
 | |
|                 return bookmark or None
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     @property
 | |
|     def all_remote_names(self):
 | |
|         remotes = self.run("paths", "--quiet").splitlines()
 | |
|         if not remotes:
 | |
|             raise RuntimeError("No remotes defined")
 | |
|         return remotes
 | |
| 
 | |
|     @property
 | |
|     def remote_name(self):
 | |
|         return self._get_most_suitable_remote(
 | |
|             "Edit .hg/hgrc and add:\n\n[paths]\ndefault = $URL",
 | |
|         )
 | |
| 
 | |
|     @property
 | |
|     def default_branch(self):
 | |
|         # Mercurial recommends keeping "default"
 | |
|         # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch
 | |
|         return "default"
 | |
| 
 | |
|     def get_url(self, remote="default"):
 | |
|         return self.run("path", "-T", "{url}", remote).strip()
 | |
| 
 | |
|     def get_commit_message(self, revision=None):
 | |
|         revision = revision or self.head_rev
 | |
|         return self.run("log", "-r", ".", "-T", "{desc}")
 | |
| 
 | |
|     def _format_diff_filter(self, diff_filter, for_status=False):
 | |
|         df = diff_filter.lower()
 | |
|         assert all(f in self._valid_diff_filter for f in df)
 | |
| 
 | |
|         # When looking at the changes in the working directory, the hg status
 | |
|         # command uses 'd' for files that have been deleted with a non-hg
 | |
|         # command, and 'r' for files that have been `hg rm`ed. Use both.
 | |
|         return df.replace("d", "dr") if for_status else df
 | |
| 
 | |
|     def _files_template(self, diff_filter):
 | |
|         template = ""
 | |
|         df = self._format_diff_filter(diff_filter)
 | |
|         if "a" in df:
 | |
|             template += "{file_adds % '{file}\\n'}"
 | |
|         if "d" in df:
 | |
|             template += "{file_dels % '{file}\\n'}"
 | |
|         if "m" in df:
 | |
|             template += "{file_mods % '{file}\\n'}"
 | |
|         return template
 | |
| 
 | |
|     def get_changed_files(
 | |
|         self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
 | |
|     ):
 | |
|         if rev is None:
 | |
|             if base_rev is not None:
 | |
|                 raise ValueError("Cannot specify `base_rev` without `rev`")
 | |
|             # Use --no-status to print just the filename.
 | |
|             df = self._format_diff_filter(diff_filter, for_status=True)
 | |
|             return self.run("status", "--no-status", f"-{df}").splitlines()
 | |
|         else:
 | |
|             template = self._files_template(diff_filter)
 | |
|             revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}"
 | |
|             return self.run("log", "-r", revision_argument, "-T", template).splitlines()
 | |
| 
 | |
|     def get_outgoing_files(self, diff_filter="ADM", upstream=None):
 | |
|         template = self._files_template(diff_filter)
 | |
| 
 | |
|         if not upstream:
 | |
|             return self.run(
 | |
|                 "log", "-r", "draft() and ancestors(.)", "--template", template
 | |
|             ).split()
 | |
| 
 | |
|         return self.run(
 | |
|             "outgoing",
 | |
|             "-r",
 | |
|             ".",
 | |
|             "--quiet",
 | |
|             "--template",
 | |
|             template,
 | |
|             upstream,
 | |
|             return_codes=(1,),
 | |
|         ).split()
 | |
| 
 | |
|     def working_directory_clean(self, untracked=False, ignored=False):
 | |
|         args = ["status", "--modified", "--added", "--removed", "--deleted"]
 | |
|         if untracked:
 | |
|             args.append("--unknown")
 | |
|         if ignored:
 | |
|             args.append("--ignored")
 | |
| 
 | |
|         # If output is empty, there are no entries of requested status, which
 | |
|         # means we are clean.
 | |
|         return not len(self.run(*args).strip())
 | |
| 
 | |
|     def update(self, ref):
 | |
|         return self.run("update", "--check", ref)
 | |
| 
 | |
|     def find_latest_common_revision(self, base_ref_or_rev, head_rev):
 | |
|         return self.run(
 | |
|             "log",
 | |
|             "-r",
 | |
|             f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))",
 | |
|             "--template",
 | |
|             "{node}",
 | |
|         ).strip()
 | |
| 
 | |
|     def does_revision_exist_locally(self, revision):
 | |
|         try:
 | |
|             return self.run("log", "-r", revision).strip() != ""
 | |
|         except subprocess.CalledProcessError as e:
 | |
|             # Error code 255 comes with the message:
 | |
|             # "abort: unknown revision $REVISION"
 | |
|             if e.returncode == 255:
 | |
|                 return False
 | |
|             raise
 | |
| 
 | |
| 
 | |
| class GitRepository(Repository):
 | |
|     tool = "git"
 | |
|     default_remote_name = "origin"
 | |
| 
 | |
|     _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD")
 | |
| 
 | |
|     @property
 | |
|     def head_rev(self):
 | |
|         return self.run("rev-parse", "--verify", "HEAD").strip()
 | |
| 
 | |
|     @property
 | |
|     def base_rev(self):
 | |
|         refs = self.run(
 | |
|             "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes"
 | |
|         ).splitlines()
 | |
|         if refs:
 | |
|             return refs[-1][1:]  # boundary starts with a prefix `-`
 | |
|         return self.head_rev
 | |
| 
 | |
|     @property
 | |
|     def branch(self):
 | |
|         return self.run("branch", "--show-current").strip() or None
 | |
| 
 | |
|     @property
 | |
|     def all_remote_names(self):
 | |
|         remotes = self.run("remote").splitlines()
 | |
|         if not remotes:
 | |
|             raise RuntimeError("No remotes defined")
 | |
|         return remotes
 | |
| 
 | |
|     @property
 | |
|     def remote_name(self):
 | |
|         try:
 | |
|             remote_branch_name = self.run(
 | |
|                 "rev-parse", "--verify", "--abbrev-ref", "--symbolic-full-name", "@{u}"
 | |
|             ).strip()
 | |
|             return remote_branch_name.split("/")[0]
 | |
|         except subprocess.CalledProcessError as e:
 | |
|             # Error code 128 comes with the message:
 | |
|             # "fatal: no upstream configured for branch $BRANCH"
 | |
|             if e.returncode != 128:
 | |
|                 raise
 | |
| 
 | |
|         return self._get_most_suitable_remote("`git remote add origin $URL`")
 | |
| 
 | |
|     @property
 | |
|     def default_branch(self):
 | |
|         try:
 | |
|             # this one works if the current repo was cloned from an existing
 | |
|             # repo elsewhere
 | |
|             return self._get_default_branch_from_cloned_metadata()
 | |
|         except (subprocess.CalledProcessError, RuntimeError):
 | |
|             pass
 | |
| 
 | |
|         try:
 | |
|             # This call works if you have (network) access to the repo
 | |
|             return self._get_default_branch_from_remote_query()
 | |
|         except (subprocess.CalledProcessError, RuntimeError):
 | |
|             pass
 | |
| 
 | |
|         # this one is the last resort in case the remote is not accessible and
 | |
|         # the local repo is where `git init` was made
 | |
|         return self._guess_default_branch()
 | |
| 
 | |
|     def _get_default_branch_from_remote_query(self):
 | |
|         # This function requires network access to the repo
 | |
|         remote_name = self.remote_name
 | |
|         output = self.run("ls-remote", "--symref", remote_name, "HEAD")
 | |
|         matches = self._LS_REMOTE_PATTERN.search(output)
 | |
|         if not matches:
 | |
|             raise RuntimeError(
 | |
|                 f'Could not find the default branch of remote repository "{remote_name}". '
 | |
|                 "Got: {output}"
 | |
|             )
 | |
| 
 | |
|         branch_name = matches.group("branch_name")
 | |
|         return f"{remote_name}/{branch_name}"
 | |
| 
 | |
|     def _get_default_branch_from_cloned_metadata(self):
 | |
|         return self.run("rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD").strip()
 | |
| 
 | |
|     def _guess_default_branch(self):
 | |
|         branches = [
 | |
|             line.strip()
 | |
|             for line in self.run(
 | |
|                 "branch", "--all", "--no-color", "--format=%(refname)"
 | |
|             ).splitlines()
 | |
|             for candidate_branch in ("main", "master", "branches/default/tip")
 | |
|             if line.strip().endswith(candidate_branch)
 | |
|         ]
 | |
| 
 | |
|         if len(branches) == 1:
 | |
|             return branches[0]
 | |
| 
 | |
|         raise RuntimeError(f"Unable to find default branch. Got: {branches}")
 | |
| 
 | |
|     def get_url(self, remote="origin"):
 | |
|         return self.run("remote", "get-url", remote).strip()
 | |
| 
 | |
|     def get_commit_message(self, revision=None):
 | |
|         revision = revision or self.head_rev
 | |
|         return self.run("log", "-n1", "--format=%B")
 | |
| 
 | |
|     def get_changed_files(
 | |
|         self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
 | |
|     ):
 | |
|         assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
 | |
| 
 | |
|         if rev is None:
 | |
|             if base_rev is not None:
 | |
|                 raise ValueError("Cannot specify `base_rev` without `rev`")
 | |
|             cmd = ["diff"]
 | |
|             if mode == "staged":
 | |
|                 cmd.append("--cached")
 | |
|             elif mode == "all":
 | |
|                 cmd.append("HEAD")
 | |
|         else:
 | |
|             revision_argument = (
 | |
|                 f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}"
 | |
|             )
 | |
|             cmd = ["log", "--format=format:", revision_argument]
 | |
| 
 | |
|         cmd.append("--name-only")
 | |
|         cmd.append("--diff-filter=" + diff_filter.upper())
 | |
| 
 | |
|         files = self.run(*cmd).splitlines()
 | |
|         return [f for f in files if f]
 | |
| 
 | |
|     def get_outgoing_files(self, diff_filter="ADM", upstream=None):
 | |
|         assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
 | |
| 
 | |
|         not_condition = upstream if upstream else "--remotes"
 | |
| 
 | |
|         files = self.run(
 | |
|             "log",
 | |
|             "--name-only",
 | |
|             f"--diff-filter={diff_filter.upper()}",
 | |
|             "--oneline",
 | |
|             "--pretty=format:",
 | |
|             "HEAD",
 | |
|             "--not",
 | |
|             not_condition,
 | |
|         ).splitlines()
 | |
|         return [f for f in files if f]
 | |
| 
 | |
|     def working_directory_clean(self, untracked=False, ignored=False):
 | |
|         args = ["status", "--porcelain"]
 | |
| 
 | |
|         # Even in --porcelain mode, behavior is affected by the
 | |
|         # ``status.showUntrackedFiles`` option, which means we need to be
 | |
|         # explicit about how to treat untracked files.
 | |
|         if untracked:
 | |
|             args.append("--untracked-files=all")
 | |
|         else:
 | |
|             args.append("--untracked-files=no")
 | |
| 
 | |
|         if ignored:
 | |
|             args.append("--ignored")
 | |
| 
 | |
|         # If output is empty, there are no entries of requested status, which
 | |
|         # means we are clean.
 | |
|         return not len(self.run(*args).strip())
 | |
| 
 | |
|     def update(self, ref):
 | |
|         self.run("checkout", ref)
 | |
| 
 | |
|     def find_latest_common_revision(self, base_ref_or_rev, head_rev):
 | |
|         return self.run("merge-base", base_ref_or_rev, head_rev).strip()
 | |
| 
 | |
|     def does_revision_exist_locally(self, revision):
 | |
|         try:
 | |
|             return self.run("cat-file", "-t", revision).strip() == "commit"
 | |
|         except subprocess.CalledProcessError as e:
 | |
|             # Error code 128 comes with the message:
 | |
|             # "git cat-file: could not get object info"
 | |
|             if e.returncode == 128:
 | |
|                 return False
 | |
|             raise
 | |
| 
 | |
| 
 | |
| def get_repository(path):
 | |
|     """Get a repository object for the repository at `path`.
 | |
|     If `path` is not a known VCS repository, raise an exception.
 | |
|     """
 | |
|     for path in ancestors(path):
 | |
|         if os.path.isdir(os.path.join(path, ".hg")):
 | |
|             return HgRepository(path)
 | |
|         elif os.path.exists(os.path.join(path, ".git")):
 | |
|             return GitRepository(path)
 | |
| 
 | |
|     raise RuntimeError("Current directory is neither a git or hg repository")
 | |
| 
 | |
| 
 | |
| def find_hg_revision_push_info(repository, revision):
 | |
|     """Given the parameters for this action and a revision, find the
 | |
|     pushlog_id of the revision."""
 | |
|     pushlog_url = PUSHLOG_TMPL.format(repository, revision)
 | |
| 
 | |
|     def query_pushlog(url):
 | |
|         r = requests.get(pushlog_url, timeout=60)
 | |
|         r.raise_for_status()
 | |
|         return r
 | |
| 
 | |
|     r = retry(
 | |
|         query_pushlog,
 | |
|         args=(pushlog_url,),
 | |
|         attempts=5,
 | |
|         sleeptime=10,
 | |
|     )
 | |
|     pushes = r.json()["pushes"]
 | |
|     if len(pushes) != 1:
 | |
|         raise RuntimeError(
 | |
|             "Unable to find a single pushlog_id for {} revision {}: {}".format(
 | |
|                 repository, revision, pushes
 | |
|             )
 | |
|         )
 | |
|     pushid = list(pushes.keys())[0]
 | |
|     return {
 | |
|         "pushdate": pushes[pushid]["date"],
 | |
|         "pushid": pushid,
 | |
|         "user": pushes[pushid]["user"],
 | |
|     }
 | 
