Bug 1636797 - In hash.py, enumerate files from the VCS rather than searching the filesystem directly r=ahal

This resolves a long-standing issue in development where `mach artifact` (and therefore `mach bootstrap`) would fail unpredictably if you had dirty, but ignored, files in your checkout. Resolving this problem often required unwieldy `hg purge`/`git ignore` incantations that are easy to get wrong.

This patch addresses the problem by doing what we "should" have been doing all along, and consulting the VCS to list tracked files rather than listing EVERY file on disk and applying heuristics to determine whether they should be included in the hash.

Differential Revision: https://phabricator.services.mozilla.com/D86780
This commit is contained in:
Ricky Stewart 2020-08-17 15:19:34 +00:00
parent a97cc4af06
commit 9719a9a5a9
4 changed files with 66 additions and 15 deletions

View file

@ -54,11 +54,10 @@ def check_files():
with get_repository_from_env() as repo: with get_repository_from_env() as repo:
root = repo.path root = repo.path
for filename in repo.get_files_in_working_directory(): for filename, _ in repo.get_tracked_files_finder().find('**/*.msg'):
if filename.endswith('.msg'): if filename not in ignore_files:
if filename not in ignore_files: if not check_single_file(os.path.join(root, filename)):
if not check_single_file(os.path.join(root, filename)): result = False
result = False
return result return result

View file

@ -4,6 +4,7 @@
from __future__ import absolute_import, print_function, unicode_literals from __future__ import absolute_import, print_function, unicode_literals
import bisect
import codecs import codecs
import errno import errno
import inspect import inspect
@ -15,11 +16,12 @@ import stat
import subprocess import subprocess
import uuid import uuid
import mozbuild.makeutil as makeutil import mozbuild.makeutil as makeutil
from itertools import chain from itertools import chain, takewhile
from mozbuild.preprocessor import Preprocessor from mozbuild.preprocessor import Preprocessor
from mozbuild.util import ( from mozbuild.util import (
FileAvoidWrite, FileAvoidWrite,
ensure_unicode, ensure_unicode,
memoize
) )
from mozpack.executables import ( from mozpack.executables import (
is_executable, is_executable,
@ -1238,3 +1240,39 @@ class MercurialRevisionFinder(BaseFinder):
self._files[path] = f self._files[path] = f
return f return f
class FileListFinder(BaseFinder):
"""Finder for a literal list of file names."""
def __init__(self, files):
"""files must be a sorted list."""
self._files = files
@memoize
def _match(self, pattern):
"""Return a sorted list of all files matching the given pattern."""
# We don't use the utility _find_helper method because it's not tuned
# for performance in the way that we would like this class to be. That's
# a possible avenue for refactoring here.
ret = []
# We do this as an optimization to figure out where in the sorted list
# to search and where to stop searching.
components = pattern.split('/')
prefix = '/'.join(takewhile(lambda s: '*' not in s, components))
start = bisect.bisect_left(self._files, prefix)
for i in six.moves.range(start, len(self._files)):
f = self._files[i]
if not f.startswith(prefix):
break
# Skip hidden files while scanning.
if '/.' in f[len(prefix):]:
continue
if mozpath.match(f, pattern):
ret.append(f)
return ret
def find(self, pattern):
pattern = pattern.strip('/')
for path in self._match(pattern):
yield path, File(path)

View file

@ -14,6 +14,7 @@ import sys
from mozbuild.util import ensure_subprocess_env from mozbuild.util import ensure_subprocess_env
from mozfile import which from mozfile import which
from mozpack.files import FileListFinder
class MissingVCSTool(Exception): class MissingVCSTool(Exception):
@ -216,8 +217,14 @@ class Repository(object):
''' '''
@abc.abstractmethod @abc.abstractmethod
def get_files_in_working_directory(self): def get_tracked_files_finder(self):
"""Obtain a list of managed files in the working directory.""" """Obtain a mozpack.files.BaseFinder of managed files in the working
directory.
The Finder will have its list of all files in the repo cached for its
entire lifetime, so operations on the Finder will not track with, for
example, commits to the repo during the Finder's lifetime.
"""
@abc.abstractmethod @abc.abstractmethod
def working_directory_clean(self, untracked=False, ignored=False): def working_directory_clean(self, untracked=False, ignored=False):
@ -419,10 +426,11 @@ class HgRepository(Repository):
return return
self._run('forget', *paths) self._run('forget', *paths)
def get_files_in_working_directory(self): def get_tracked_files_finder(self):
# Can return backslashes on Windows. Normalize to forward slashes. # Can return backslashes on Windows. Normalize to forward slashes.
return list(p.replace('\\', '/') for p in files = list(p.replace('\\', '/') for p in
self._run(b'files', b'-0').split('\0') if p) self._run(b'files', b'-0').split('\0') if p)
return FileListFinder(files)
def working_directory_clean(self, untracked=False, ignored=False): def working_directory_clean(self, untracked=False, ignored=False):
args = ['status', '--modified', '--added', '--removed', args = ['status', '--modified', '--added', '--removed',
@ -549,8 +557,9 @@ class GitRepository(Repository):
return return
self._run('reset', *paths) self._run('reset', *paths)
def get_files_in_working_directory(self): def get_tracked_files_finder(self):
return [p for p in self._run('ls-files', '-z').split('\0') if p] files = [p for p in self._run('ls-files', '-z').split('\0') if p]
return FileListFinder(files)
def working_directory_clean(self, untracked=False, ignored=False): def working_directory_clean(self, untracked=False, ignored=False):
args = ['status', '--porcelain'] args = ['status', '--porcelain']

View file

@ -4,8 +4,8 @@
from __future__ import absolute_import, print_function, unicode_literals from __future__ import absolute_import, print_function, unicode_literals
from mozbuild.util import memoize from mozbuild.util import memoize
from mozpack.files import FileFinder
import mozpack.path as mozpath import mozpack.path as mozpath
from mozversioncontrol import get_repository_object
import hashlib import hashlib
import io import io
import six import six
@ -21,6 +21,11 @@ def hash_path(path):
return hashlib.sha256(fh.read()).hexdigest() return hashlib.sha256(fh.read()).hexdigest()
@memoize
def get_file_finder(base_path):
return get_repository_object(base_path).get_tracked_files_finder()
def hash_paths(base_path, patterns): def hash_paths(base_path, patterns):
""" """
Give a list of path patterns, return a digest of the contents of all Give a list of path patterns, return a digest of the contents of all
@ -30,7 +35,7 @@ def hash_paths(base_path, patterns):
Each file is hashed. The list of all hashes and file paths is then Each file is hashed. The list of all hashes and file paths is then
itself hashed to produce the result. itself hashed to produce the result.
""" """
finder = FileFinder(base_path) finder = get_file_finder(base_path)
h = hashlib.sha256() h = hashlib.sha256()
files = {} files = {}
for pattern in patterns: for pattern in patterns: