forked from mirrors/gecko-dev
		
	 f024d94b2b
			
		
	
	
		f024d94b2b
		
	
	
	
	
		
			
			# ignore-this-changeset Differential Revision: https://phabricator.services.mozilla.com/D186092
		
			
				
	
	
		
			481 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			481 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| # This Source Code Form is subject to the terms of the Mozilla Public
 | |
| # License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | |
| 
 | |
| import os
 | |
| import re
 | |
| import subprocess
 | |
| import sys
 | |
| 
 | |
| import hglib
 | |
| import pygit2
 | |
| 
 | |
| DEBUG = False
 | |
| 
 | |
| 
 | |
| def eprint(*args, **kwargs):
 | |
|     print(*args, file=sys.stderr, **kwargs)
 | |
| 
 | |
| 
 | |
| def debugprint(*args, **kwargs):
 | |
|     if DEBUG:
 | |
|         eprint(*args, **kwargs)
 | |
| 
 | |
| 
 | |
| class HgCommit:
 | |
|     def __init__(self, parent1, parent2):
 | |
|         self.parents = []
 | |
|         if parent1 == NULL_PARENT_REV:
 | |
|             raise Exception(
 | |
|                 "Encountered a hg changeset with no parents! We don't handle this...."
 | |
|             )
 | |
|         self.parents.append(parent1)
 | |
|         if parent2 != NULL_PARENT_REV:
 | |
|             self.parents.append(parent2)
 | |
|         self.touches_sync_code = False
 | |
|         self.children = []
 | |
| 
 | |
|     def add_child(self, rev):
 | |
|         self.children.append(rev)
 | |
| 
 | |
| 
 | |
| class GitCommit:
 | |
|     def __init__(self, hg_rev, commit_obj):
 | |
|         self.hg_rev = hg_rev
 | |
|         self.commit_obj = commit_obj
 | |
| 
 | |
| 
 | |
| def load_git_repository():
 | |
|     commit_map = dict()
 | |
|     # First, scan the tags for "mozilla-xxx" that keep track of manually synchronized changes
 | |
|     sync_tags = filter(
 | |
|         lambda ref: ref.startswith("refs/tags/mozilla-"),
 | |
|         list(downstream_git_repo.references),
 | |
|     )
 | |
|     for desc in sync_tags:
 | |
|         commit = downstream_git_repo.lookup_reference(desc).peel()
 | |
|         # cut out the revision hash from the output
 | |
|         hg_rev = desc[18:]
 | |
|         commit_map[hg_rev] = GitCommit(hg_rev, commit)
 | |
|         debugprint("Loaded pre-existing tag hg %s -> git %s" % (hg_rev, commit.oid))
 | |
| 
 | |
|     # Next, scan the commits for a specific message format
 | |
|     re_commitmsg = re.compile(
 | |
|         r"^\[(ghsync|wrupdater)\] From https://hg.mozilla.org/mozilla-central/rev/([0-9a-fA-F]+)$",
 | |
|         re.MULTILINE,
 | |
|     )
 | |
|     for commit in downstream_git_repo.walk(downstream_git_repo.head.target):
 | |
|         m = re_commitmsg.search(commit.message)
 | |
|         if not m:
 | |
|             continue
 | |
|         hg_rev = m.group(2)
 | |
|         commit_map[hg_rev] = GitCommit(hg_rev, commit)
 | |
|         debugprint("Loaded pre-existing commit hg %s -> git %s" % (hg_rev, commit.oid))
 | |
|     return commit_map
 | |
| 
 | |
| 
 | |
| def timeof(git_commit):
 | |
|     return git_commit.commit_obj.commit_time + git_commit.commit_obj.commit_time_offset
 | |
| 
 | |
| 
 | |
| def find_newest_commit(commit_map):
 | |
|     newest_hg_rev = None
 | |
|     newest_commit_time = None
 | |
| 
 | |
|     for hg_rev, git_commit in commit_map.items():
 | |
|         if newest_hg_rev is None or timeof(git_commit) > newest_commit_time:
 | |
|             newest_hg_rev = hg_rev
 | |
|             newest_commit_time = timeof(git_commit)
 | |
| 
 | |
|     return newest_hg_rev
 | |
| 
 | |
| 
 | |
| def get_single_rev(revset):
 | |
|     output = subprocess.check_output(
 | |
|         ["hg", "log", "-r", revset, "--template", "{node}"]
 | |
|     )
 | |
|     output = str(output, "ascii")
 | |
|     return output
 | |
| 
 | |
| 
 | |
| def get_multiple_revs(revset, template):
 | |
|     output = subprocess.check_output(
 | |
|         ["hg", "log", "-r", revset, "--template", template + "\\n"]
 | |
|     )
 | |
|     for line in output.splitlines():
 | |
|         yield str(line, "ascii")
 | |
| 
 | |
| 
 | |
| def get_base_hg_rev(commit_map):
 | |
|     base_hg_rev = find_newest_commit(commit_map)
 | |
|     eprint("Using %s as base hg revision" % base_hg_rev)
 | |
|     return base_hg_rev
 | |
| 
 | |
| 
 | |
| def load_hg_commits(commits, query):
 | |
|     for cset in get_multiple_revs(query, "{node} {p1node} {p2node}"):
 | |
|         tokens = cset.split()
 | |
|         commits[tokens[0]] = HgCommit(tokens[1], tokens[2])
 | |
|     return commits
 | |
| 
 | |
| 
 | |
| def get_real_base_hg_rev(hg_data, commit_map):
 | |
|     # Some of the HG commits we want to port to github may have landed on codelines
 | |
|     # that branched off central prior to base_hg_rev. So when we create the git
 | |
|     # equivalents, they will have parents that are not the HEAD of the git repo,
 | |
|     # but instead will be descendants of older commits in the git repo. In order
 | |
|     # to do this correctly, we need to find the hg-equivalents of all of those
 | |
|     # possible git parents. So first we identify all the "tail" hg revisions in
 | |
|     # our hg_data set (think "tail" as in opposite of "head" which is the tipmost
 | |
|     # commit). The "tail" hg revisions are the ones for which we don't have their
 | |
|     # ancestors in hg_data.
 | |
|     tails = []
 | |
|     for rev, cset in hg_data.items():
 | |
|         for parent in cset.parents:
 | |
|             if parent not in hg_data:
 | |
|                 tails.append(rev)
 | |
|     eprint("Found hg tail revisions %s" % tails)
 | |
|     # Then we find their common ancestor, which will be some ancestor of base_hg_rev
 | |
|     # from which those codelines.
 | |
|     if len(tails) == 0:
 | |
|         common_ancestor = get_single_rev(".")
 | |
|     else:
 | |
|         common_ancestor = get_single_rev("ancestor(" + ",".join(tails) + ")")
 | |
|     eprint("Found common ancestor of tail revisions: %s" % common_ancestor)
 | |
| 
 | |
|     # And then we find the newest git commit whose hg-equivalent is an ancestor of
 | |
|     # that common ancestor, to make sure we are starting from a known hg/git
 | |
|     # commit pair.
 | |
|     for git_commit in sorted(commit_map.values(), key=timeof, reverse=True):
 | |
|         new_base = get_single_rev(
 | |
|             "ancestor(" + common_ancestor + "," + git_commit.hg_rev + ")"
 | |
|         )
 | |
|         if new_base == common_ancestor:
 | |
|             eprint(
 | |
|                 "Pre-existing git commit %s from hg rev %s is descendant of common ancestor; %s"
 | |
|                 % (
 | |
|                     git_commit.commit_obj.id,
 | |
|                     git_commit.hg_rev,
 | |
|                     "walking back further...",
 | |
|                 )
 | |
|             )
 | |
|             continue
 | |
|         if new_base != git_commit.hg_rev:
 | |
|             eprint(
 | |
|                 "Pre-existing git commit %s from hg rev %s is on sibling branch"
 | |
|                 " of common ancestor; %s"
 | |
|                 % (
 | |
|                     git_commit.commit_obj.id,
 | |
|                     git_commit.hg_rev,
 | |
|                     "walking back further...",
 | |
|                 )
 | |
|             )
 | |
|             continue
 | |
|         eprint(
 | |
|             "Pre-existing git commit %s from hg rev %s is sufficiently old; stopping walk"
 | |
|             % (git_commit.commit_obj.id, git_commit.hg_rev)
 | |
|         )
 | |
|         common_ancestor = new_base
 | |
|         break
 | |
| 
 | |
|     return common_ancestor
 | |
| 
 | |
| 
 | |
| # Now we prune out all the uninteresting changesets from hg_commits. The
 | |
| # uninteresting ones are ones that don't touch the target code, are not merges,
 | |
| # and are not referenced by mozilla tags in the git repo.
 | |
| # We do this by rewriting the parents to the "interesting" ancestor.
 | |
| def prune_boring(rev):
 | |
|     while rev in hg_commits:
 | |
|         parent_pruned = False
 | |
|         for i in range(len(hg_commits[rev].parents)):
 | |
|             parent_rev = hg_commits[rev].parents[i]
 | |
|             if parent_rev not in hg_commits:
 | |
|                 continue
 | |
|             if hg_commits[parent_rev].touches_sync_code:
 | |
|                 continue
 | |
|             if len(hg_commits[parent_rev].parents) > 1:
 | |
|                 continue
 | |
|             if parent_rev in hg_to_git_commit_map:
 | |
|                 continue
 | |
| 
 | |
|             # If we get here, then `parent_rev` is a boring revision and we can
 | |
|             # prune it. Connect `rev` to its grandparent, and prune the parent
 | |
|             grandparent_rev = hg_commits[parent_rev].parents[0]
 | |
|             hg_commits[rev].parents[i] = grandparent_rev
 | |
|             # eprint("Pruned %s as boring parent of %s, using %s now" %
 | |
|             #    (parent_rev, rev, grandparent_rev))
 | |
|             parent_pruned = True
 | |
| 
 | |
|         if parent_pruned:
 | |
|             # If we pruned a parent, process `rev` again as we might want to
 | |
|             # prune more parents
 | |
|             continue
 | |
| 
 | |
|         # Collapse identical parents, because if the parents are identical
 | |
|         # we don't need to keep multiple copies of them.
 | |
|         hg_commits[rev].parents = list(dict.fromkeys(hg_commits[rev].parents))
 | |
| 
 | |
|         # If we get here, all of `rev`s parents are interesting, so we can't
 | |
|         # prune them. Move up to the parent rev and start processing that, or
 | |
|         # if we have multiple parents then recurse on those nodes.
 | |
|         if len(hg_commits[rev].parents) == 1:
 | |
|             rev = hg_commits[rev].parents[0]
 | |
|             continue
 | |
| 
 | |
|         for parent_rev in hg_commits[rev].parents:
 | |
|             prune_boring(parent_rev)
 | |
|         return
 | |
| 
 | |
| 
 | |
| class FakeCommit:
 | |
|     def __init__(self, oid):
 | |
|         self.oid = oid
 | |
| 
 | |
| 
 | |
| def fake_commit(hg_rev, parent1, parent2):
 | |
|     if parent1 is None:
 | |
|         eprint("ERROR: Trying to build on None")
 | |
|         exit(1)
 | |
|     oid = "githash_%s" % hash(parent1)
 | |
|     eprint("Fake-built %s" % oid)
 | |
|     return FakeCommit(oid)
 | |
| 
 | |
| 
 | |
| def build_tree(builder, treedata):
 | |
|     for name, value in treedata.items():
 | |
|         if isinstance(value, dict):
 | |
|             subbuilder = downstream_git_repo.TreeBuilder()
 | |
|             build_tree(subbuilder, value)
 | |
|             builder.insert(name, subbuilder.write(), pygit2.GIT_FILEMODE_TREE)
 | |
|         else:
 | |
|             (filemode, contents) = value
 | |
|             blob_oid = downstream_git_repo.create_blob(contents)
 | |
|             builder.insert(name, blob_oid, filemode)
 | |
| 
 | |
| 
 | |
| def author_to_signature(author):
 | |
|     pieces = author.strip().split("<")
 | |
|     if len(pieces) != 2 or pieces[1][-1] != ">":
 | |
|         # We could probably handle this better
 | |
|         return pygit2.Signature(author, "")
 | |
|     name = pieces[0].strip()
 | |
|     email = pieces[1][:-1].strip()
 | |
|     return pygit2.Signature(name, email)
 | |
| 
 | |
| 
 | |
| def real_commit(hg_rev, parent1, parent2):
 | |
|     filetree = dict()
 | |
|     manifest = mozilla_hg_repo.manifest(rev=hg_rev)
 | |
|     for nodeid, permission, executable, symlink, filename in manifest:
 | |
|         if not filename.startswith(relative_path.encode("utf-8")):
 | |
|             continue
 | |
|         if symlink:
 | |
|             filemode = pygit2.GIT_FILEMODE_LINK
 | |
|         elif executable:
 | |
|             filemode = pygit2.GIT_FILEMODE_BLOB_EXECUTABLE
 | |
|         else:
 | |
|             filemode = pygit2.GIT_FILEMODE_BLOB
 | |
|         filecontent = mozilla_hg_repo.cat([filename], rev=hg_rev)
 | |
|         subtree = filetree
 | |
|         for component in filename.split(b"/")[2:-1]:
 | |
|             subtree = subtree.setdefault(component.decode("latin-1"), dict())
 | |
|         filename = filename.split(b"/")[-1]
 | |
|         subtree[filename.decode("latin-1")] = (filemode, filecontent)
 | |
| 
 | |
|     builder = downstream_git_repo.TreeBuilder()
 | |
|     build_tree(builder, filetree)
 | |
|     tree_oid = builder.write()
 | |
| 
 | |
|     parent1_obj = downstream_git_repo.get(parent1)
 | |
|     if parent1_obj.tree_id == tree_oid:
 | |
|         eprint("Early-exit; tree matched that of parent git commit %s" % parent1)
 | |
|         return parent1_obj
 | |
| 
 | |
|     if parent2 is not None:
 | |
|         parent2_obj = downstream_git_repo.get(parent2)
 | |
|         if parent2_obj.tree_id == tree_oid:
 | |
|             eprint("Early-exit; tree matched that of parent git commit %s" % parent2)
 | |
|             return parent2_obj
 | |
| 
 | |
|     hg_rev_obj = mozilla_hg_repo.log(revrange=hg_rev, limit=1)[0]
 | |
|     commit_author = hg_rev_obj[4].decode("latin-1")
 | |
|     commit_message = hg_rev_obj[5].decode("latin-1")
 | |
|     commit_message += (
 | |
|         "\n\n[ghsync] From https://hg.mozilla.org/mozilla-central/rev/%s" % hg_rev
 | |
|         + "\n"
 | |
|     )
 | |
| 
 | |
|     parents = [parent1]
 | |
|     if parent2 is not None:
 | |
|         parents.append(parent2)
 | |
|     commit_oid = downstream_git_repo.create_commit(
 | |
|         None,
 | |
|         author_to_signature(commit_author),
 | |
|         author_to_signature(commit_author),
 | |
|         commit_message,
 | |
|         tree_oid,
 | |
|         parents,
 | |
|     )
 | |
|     eprint("Built git commit %s" % commit_oid)
 | |
|     return downstream_git_repo.get(commit_oid)
 | |
| 
 | |
| 
 | |
| def try_commit(hg_rev, parent1, parent2=None):
 | |
|     if False:
 | |
|         return fake_commit(hg_rev, parent1, parent2)
 | |
|     else:
 | |
|         return real_commit(hg_rev, parent1, parent2)
 | |
| 
 | |
| 
 | |
| def build_git_commits(rev):
 | |
|     debugprint("build_git_commit(%s)..." % rev)
 | |
|     if rev in hg_to_git_commit_map:
 | |
|         debugprint("  maps to %s" % hg_to_git_commit_map[rev].commit_obj.oid)
 | |
|         return hg_to_git_commit_map[rev].commit_obj.oid
 | |
| 
 | |
|     if rev not in hg_commits:
 | |
|         debugprint("  not in hg_commits")
 | |
|         return None
 | |
| 
 | |
|     if len(hg_commits[rev].parents) == 1:
 | |
|         git_parent = build_git_commits(hg_commits[rev].parents[0])
 | |
|         if not hg_commits[rev].touches_sync_code:
 | |
|             eprint(
 | |
|                 "WARNING: Found rev %s that is non-merge and not related to the target"
 | |
|                 % rev
 | |
|             )
 | |
|             return git_parent
 | |
|         eprint("Building git equivalent for %s on top of %s" % (rev, git_parent))
 | |
|         commit_obj = try_commit(rev, git_parent)
 | |
|         hg_to_git_commit_map[rev] = GitCommit(rev, commit_obj)
 | |
|         debugprint("  built %s as %s" % (rev, commit_obj.oid))
 | |
|         return commit_obj.oid
 | |
| 
 | |
|     git_parent_1 = build_git_commits(hg_commits[rev].parents[0])
 | |
|     git_parent_2 = build_git_commits(hg_commits[rev].parents[1])
 | |
|     if git_parent_1 is None or git_parent_2 is None or git_parent_1 == git_parent_2:
 | |
|         git_parent = git_parent_1 if git_parent_2 is None else git_parent_2
 | |
|         if not hg_commits[rev].touches_sync_code:
 | |
|             debugprint(
 | |
|                 "  %s is merge with no parents or doesn't touch WR, returning %s"
 | |
|                 % (rev, git_parent)
 | |
|             )
 | |
|             return git_parent
 | |
| 
 | |
|         eprint(
 | |
|             "WARNING: Found merge rev %s whose parents have identical target code"
 | |
|             ", but modifies the target" % rev
 | |
|         )
 | |
|         eprint("Building git equivalent for %s on top of %s" % (rev, git_parent))
 | |
|         commit_obj = try_commit(rev, git_parent)
 | |
|         hg_to_git_commit_map[rev] = GitCommit(rev, commit_obj)
 | |
|         debugprint("  built %s as %s" % (rev, commit_obj.oid))
 | |
|         return commit_obj.oid
 | |
| 
 | |
|     # An actual merge
 | |
|     eprint(
 | |
|         "Building git equivalent for %s on top of %s, %s"
 | |
|         % (rev, git_parent_1, git_parent_2)
 | |
|     )
 | |
|     commit_obj = try_commit(rev, git_parent_1, git_parent_2)
 | |
|     hg_to_git_commit_map[rev] = GitCommit(rev, commit_obj)
 | |
|     debugprint("  built %s as %s" % (rev, commit_obj.oid))
 | |
|     return commit_obj.oid
 | |
| 
 | |
| 
 | |
| def pretty_print(rev, cset):
 | |
|     desc = "  %s" % rev
 | |
|     desc += " parents: %s" % cset.parents
 | |
|     if rev in hg_to_git_commit_map:
 | |
|         desc += " git: %s" % hg_to_git_commit_map[rev].commit_obj.oid
 | |
|     if rev == hg_tip:
 | |
|         desc += " (tip)"
 | |
|     return desc
 | |
| 
 | |
| 
 | |
| if len(sys.argv) < 3:
 | |
|     eprint("Usage: %s <local-checkout-path> <repo-relative-path>" % sys.argv[0])
 | |
|     eprint("Current dir must be the mozilla hg repo")
 | |
|     exit(1)
 | |
| 
 | |
| local_checkout_path = sys.argv[1]
 | |
| relative_path = sys.argv[2]
 | |
| mozilla_hg_path = os.getcwd()
 | |
| NULL_PARENT_REV = "0000000000000000000000000000000000000000"
 | |
| 
 | |
| downstream_git_repo = pygit2.Repository(pygit2.discover_repository(local_checkout_path))
 | |
| mozilla_hg_repo = hglib.open(mozilla_hg_path)
 | |
| hg_to_git_commit_map = load_git_repository()
 | |
| base_hg_rev = get_base_hg_rev(hg_to_git_commit_map)
 | |
| if base_hg_rev is None:
 | |
|     eprint("Found no sync commits or 'mozilla-xxx' tags")
 | |
|     exit(1)
 | |
| 
 | |
| hg_commits = load_hg_commits(dict(), "only(.," + base_hg_rev + ")")
 | |
| eprint("Initial set has %s changesets" % len(hg_commits))
 | |
| base_hg_rev = get_real_base_hg_rev(hg_commits, hg_to_git_commit_map)
 | |
| eprint("Using hg rev %s as common ancestor of all interesting changesets" % base_hg_rev)
 | |
| 
 | |
| # Refresh hg_commits with our wider dataset
 | |
| hg_tip = get_single_rev(".")
 | |
| wider_range = "%s::%s" % (base_hg_rev, hg_tip)
 | |
| hg_commits = load_hg_commits(hg_commits, wider_range)
 | |
| eprint("Updated set has %s changesets" % len(hg_commits))
 | |
| 
 | |
| if DEBUG:
 | |
|     eprint("Graph of descendants of %s" % base_hg_rev)
 | |
|     output = subprocess.check_output(
 | |
|         [
 | |
|             "hg",
 | |
|             "log",
 | |
|             "--graph",
 | |
|             "-r",
 | |
|             "descendants(" + base_hg_rev + ")",
 | |
|             "--template",
 | |
|             "{node} {desc|firstline}\\n",
 | |
|         ]
 | |
|     )
 | |
|     for line in output.splitlines():
 | |
|         eprint(line.decode("utf-8", "ignore"))
 | |
| 
 | |
| # Also flag any changes that touch the project
 | |
| query = "(" + wider_range + ') & file("glob:' + relative_path + '/**")'
 | |
| for cset in get_multiple_revs(query, "{node}"):
 | |
|     debugprint("Changeset %s modifies %s" % (cset, relative_path))
 | |
|     hg_commits[cset].touches_sync_code = True
 | |
| eprint(
 | |
|     "Identified %s changesets that touch the target code"
 | |
|     % sum([1 if v.touches_sync_code else 0 for (k, v) in hg_commits.items()])
 | |
| )
 | |
| 
 | |
| prune_boring(hg_tip)
 | |
| 
 | |
| # hg_tip itself might be boring
 | |
| if not hg_commits[hg_tip].touches_sync_code and len(hg_commits[hg_tip].parents) == 1:
 | |
|     new_tip = hg_commits[hg_tip].parents[0]
 | |
|     eprint("Pruned tip %s as boring, using %s now" % (hg_tip, new_tip))
 | |
|     hg_tip = new_tip
 | |
| 
 | |
| eprint("--- Interesting changesets ---")
 | |
| for rev, cset in hg_commits.items():
 | |
|     if cset.touches_sync_code or len(cset.parents) > 1 or rev in hg_to_git_commit_map:
 | |
|         eprint(pretty_print(rev, cset))
 | |
| if DEBUG:
 | |
|     eprint("--- Other changesets (not really interesting) ---")
 | |
|     for rev, cset in hg_commits.items():
 | |
|         if not (
 | |
|             cset.touches_sync_code
 | |
|             or len(cset.parents) > 1
 | |
|             or rev in hg_to_git_commit_map
 | |
|         ):
 | |
|             eprint(pretty_print(rev, cset))
 | |
| 
 | |
| git_tip = build_git_commits(hg_tip)
 | |
| if git_tip is None:
 | |
|     eprint("No new changesets generated, exiting.")
 | |
| else:
 | |
|     downstream_git_repo.create_reference("refs/heads/github-sync", git_tip, force=True)
 | |
|     eprint("Updated github-sync branch to %s, done!" % git_tip)
 |