forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			969 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			969 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/python3 -u
 | |
| # This Source Code Form is subject to the terms of the Mozilla Public
 | |
| # License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | |
| 
 | |
| """Run a task after performing common actions.
 | |
| 
 | |
| This script is meant to be the "driver" for TaskCluster based tasks.
 | |
| It receives some common arguments to control the run-time environment.
 | |
| 
 | |
| It performs actions as requested from the arguments. Then it executes
 | |
| the requested process and prints its output, prefixing it with the
 | |
| current time to improve log usefulness.
 | |
| """
 | |
| 
 | |
| import sys
 | |
| 
 | |
| 
 | |
| if sys.version_info[0:2] < (3, 5):
 | |
|     print('run-task requires Python 3.5+')
 | |
|     sys.exit(1)
 | |
| 
 | |
| 
 | |
| import argparse
 | |
| import datetime
 | |
| import errno
 | |
| import io
 | |
| import json
 | |
| import os
 | |
| import platform
 | |
| import random
 | |
| import re
 | |
| import shutil
 | |
| import signal
 | |
| import socket
 | |
| import stat
 | |
| import subprocess
 | |
| 
 | |
| import urllib.error
 | |
| import urllib.request
 | |
| 
 | |
| from threading import Thread
 | |
| 
 | |
| FINGERPRINT_URL = 'http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgfingerprint'
 | |
| FALLBACK_FINGERPRINT = {
 | |
|     'fingerprints':
 | |
|         "sha256:4D:EB:21:6E:35:2F:99:C6:8F:C3:47:9B:57:B8:6C:17:15:8F:86:09:D4:6C:17:1D:87:B0:DE:F9:0E:51:70:FC,"
 | |
|         "sha256:FF:E7:8D:93:E9:56:3C:C0:19:FC:00:4C:18:B9:86:E5:08:E5:10:F5:E2:EA:48:E8:22:D3:A3:3A:CA:99:C3:4C"
 | |
| }
 | |
| 
 | |
| HGMOINTERNAL_CONFIG_URL = 'http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgmointernal'
 | |
| 
 | |
| CACHE_UID_GID_MISMATCH = '''
 | |
| There is a UID/GID mismatch on the cache. This likely means:
 | |
| 
 | |
| a) different tasks are running as a different user/group
 | |
| b) different Docker images have different UID/GID for the same user/group
 | |
| 
 | |
| Our cache policy is that the UID/GID for ALL tasks must be consistent
 | |
| for the lifetime of the cache. This eliminates permissions problems due
 | |
| to file/directory user/group ownership.
 | |
| 
 | |
| To make this error go away, ensure that all Docker images are use
 | |
| a consistent UID/GID and that all tasks using this cache are running as
 | |
| the same user/group.
 | |
| '''
 | |
| 
 | |
| 
 | |
| NON_EMPTY_VOLUME = '''
 | |
| error: volume %s is not empty
 | |
| 
 | |
| Our Docker image policy requires volumes to be empty.
 | |
| 
 | |
| The volume was likely populated as part of building the Docker image.
 | |
| Change the Dockerfile and anything run from it to not create files in
 | |
| any VOLUME.
 | |
| 
 | |
| A lesser possibility is that you stumbled upon a TaskCluster platform bug
 | |
| where it fails to use new volumes for tasks.
 | |
| '''
 | |
| 
 | |
| 
 | |
| FETCH_CONTENT_NOT_FOUND = '''
 | |
| error: fetch-content script not found
 | |
| 
 | |
| The script at `taskcluster/scripts/misc/fetch-content` could not be
 | |
| detected in the current environment.
 | |
| 
 | |
| If this task clones gecko, make sure the GECKO_PATH environment variable
 | |
| is set to proper location. Otherwise, the script may need to be mounted
 | |
| or added to the task's docker image then added to the PATH.
 | |
| '''
 | |
| 
 | |
| # The exit code to use when caches should be purged and the task retried.
 | |
| # This is EX_OSFILE (from sysexits.h):
 | |
| #     Some system file  does not exist, cannot be opened, or has some
 | |
| #     sort of error (e.g., syntax error).
 | |
| EXIT_PURGE_CACHE = 72
 | |
| 
 | |
| 
 | |
| IS_MACOSX = sys.platform == 'darwin'
 | |
| IS_POSIX = os.name == 'posix'
 | |
| IS_WINDOWS = os.name == 'nt'
 | |
| 
 | |
| 
 | |
| def print_line(prefix, m):
 | |
|     now = datetime.datetime.utcnow().isoformat().encode('utf-8')
 | |
|     # slice microseconds to 3 decimals.
 | |
|     now = now[:-3] if now[-7:-6] == b'.' else now
 | |
|     bytes = b'[%s %sZ] %s' % (prefix, now, m)
 | |
|     written = 0
 | |
|     while written < len(bytes):
 | |
|         written += (sys.stdout.buffer.write(bytes[written:]) or 0)
 | |
|     sys.stdout.buffer.flush()
 | |
| 
 | |
| 
 | |
| def run_and_prefix_output(prefix, args, *, extra_env=None, cwd=None):
 | |
|     """Runs a process and prefixes its output with the time.
 | |
| 
 | |
|     Returns the process exit code.
 | |
|     """
 | |
|     print_line(
 | |
|         prefix,
 | |
|         b"executing %r%s\n" % (args, b"in %s" % (cwd.encode("utf-8"),) if cwd else b""),
 | |
|     )
 | |
| 
 | |
|     env = dict(os.environ)
 | |
|     env.update(extra_env or {})
 | |
| 
 | |
|     # Note: TaskCluster's stdin is a TTY. This attribute is lost
 | |
|     # when we pass sys.stdin to the invoked process. If we cared
 | |
|     # to preserve stdin as a TTY, we could make this work. But until
 | |
|     # someone needs it, don't bother.
 | |
| 
 | |
|     # We want stdout to be bytes on Python 3. That means we can't use
 | |
|     # universal_newlines=True (because it implies text mode). But
 | |
|     # p.stdout.readline() won't work for bytes text streams. So, on Python 3,
 | |
|     # we manually install a latin1 stream wrapper. This allows us to readline()
 | |
|     # and preserves bytes, without losing any data.
 | |
| 
 | |
|     p = subprocess.Popen(args,
 | |
|                          # Disable buffering because we want to receive output
 | |
|                          # as it is generated so timestamps in logs are
 | |
|                          # accurate.
 | |
|                          bufsize=0,
 | |
|                          stdout=subprocess.PIPE,
 | |
|                          stderr=subprocess.STDOUT,
 | |
|                          stdin=sys.stdin.fileno(),
 | |
|                          env=env,
 | |
|                          cwd=cwd)
 | |
| 
 | |
|     stdout = io.TextIOWrapper(p.stdout, encoding='latin1')
 | |
| 
 | |
|     while True:
 | |
|         data = stdout.readline().encode('latin1')
 | |
| 
 | |
|         if data == b'':
 | |
|             break
 | |
| 
 | |
|         print_line(prefix, data)
 | |
| 
 | |
|     return p.wait()
 | |
| 
 | |
| 
 | |
| def get_posix_user_group(user, group):
 | |
|     import grp
 | |
|     import pwd
 | |
| 
 | |
|     try:
 | |
|         user_record = pwd.getpwnam(user)
 | |
|     except KeyError:
 | |
|         print('could not find user %s; specify a valid user with --user' % user)
 | |
|         sys.exit(1)
 | |
| 
 | |
|     try:
 | |
|         group_record = grp.getgrnam(group)
 | |
|     except KeyError:
 | |
|         print('could not find group %s; specify a valid group with --group' %
 | |
|               group)
 | |
|         sys.exit(1)
 | |
| 
 | |
|     # Most tasks use worker:worker. We require they have a specific numeric ID
 | |
|     # because otherwise it is too easy for files written to caches to have
 | |
|     # mismatched numeric IDs, which results in permissions errors.
 | |
|     if user_record.pw_name == 'worker' and user_record.pw_uid != 1000:
 | |
|         print('user `worker` must have uid=1000; got %d' % user_record.pw_uid)
 | |
|         sys.exit(1)
 | |
| 
 | |
|     if group_record.gr_name == 'worker' and group_record.gr_gid != 1000:
 | |
|         print('group `worker` must have gid=1000; got %d' % group_record.gr_gid)
 | |
|         sys.exit(1)
 | |
| 
 | |
|     # Find all groups to which this user is a member.
 | |
|     gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem]
 | |
| 
 | |
|     return user_record, group_record, gids
 | |
| 
 | |
| 
 | |
| def write_audit_entry(path, msg):
 | |
|     now = datetime.datetime.utcnow().isoformat().encode('utf-8')
 | |
|     with open(path, 'ab') as fh:
 | |
|         fh.write(b'[%sZ %s] %s\n' % (
 | |
|                  now, os.environb.get(b'TASK_ID', b'UNKNOWN'), msg))
 | |
| 
 | |
| 
 | |
| WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
 | |
| 
 | |
| 
 | |
| def set_dir_permissions(path, uid, gid):
 | |
|     st = os.lstat(path)
 | |
| 
 | |
|     if st.st_uid != uid or st.st_gid != gid:
 | |
|         os.chown(path, uid, gid)
 | |
| 
 | |
|     # Also make sure dirs are writable in case we need to delete
 | |
|     # them.
 | |
|     if st.st_mode & WANTED_DIR_MODE != WANTED_DIR_MODE:
 | |
|         os.chmod(path, st.st_mode | WANTED_DIR_MODE)
 | |
| 
 | |
| 
 | |
| def chown_recursive(path, user, group, uid, gid):
 | |
|     print_line(b'chown',
 | |
|                b'recursively changing ownership of %s to %s:%s\n' %
 | |
|                (path.encode('utf-8'), user.encode('utf-8'), group.encode(
 | |
|                    'utf-8')))
 | |
| 
 | |
|     set_dir_permissions(path, uid, gid)
 | |
| 
 | |
|     for root, dirs, files in os.walk(path):
 | |
|         for d in dirs:
 | |
|             set_dir_permissions(os.path.join(root, d), uid, gid)
 | |
| 
 | |
|         for f in files:
 | |
|             # File may be a symlink that points to nowhere. In which case
 | |
|             # os.chown() would fail because it attempts to follow the
 | |
|             # symlink. We only care about directory entries, not what
 | |
|             # they point to. So setting the owner of the symlink should
 | |
|             # be sufficient.
 | |
|             os.lchown(os.path.join(root, f), uid, gid)
 | |
| 
 | |
| 
 | |
| def configure_cache_posix(cache, user, group,
 | |
|                           untrusted_caches, running_as_root):
 | |
|     """Configure a cache path on POSIX platforms.
 | |
| 
 | |
|     For each cache, we write out a special file denoting attributes and
 | |
|     capabilities of run-task and the task being executed. These attributes
 | |
|     are used by subsequent run-task invocations to validate that use of
 | |
|     the cache is acceptable.
 | |
| 
 | |
|     We /could/ blow away the cache data on requirements mismatch.
 | |
|     While this would be convenient, this could result in "competing" tasks
 | |
|     effectively undoing the other's work. This would slow down task
 | |
|     execution in aggregate. Without monitoring for this, people may not notice
 | |
|     the problem and tasks would be slower than they could be. We follow the
 | |
|     principle of "fail fast" to ensure optimal task execution.
 | |
| 
 | |
|     We also write an audit log of who used the caches. This log is printed
 | |
|     during failures to help aid debugging.
 | |
|     """
 | |
| 
 | |
|     our_requirements = {
 | |
|         # Include a version string that we can bump whenever to trigger
 | |
|         # fresh caches. The actual value is not relevant and doesn't need
 | |
|         # to follow any explicit order. Since taskgraph bakes this file's
 | |
|         # hash into cache names, any change to this file/version is sufficient
 | |
|         # to force the use of a new cache.
 | |
|         b'version=1',
 | |
|         # Include the UID and GID the task will run as to ensure that tasks
 | |
|         # with different UID and GID don't share the same cache.
 | |
|         b'uid=%d' % user.pw_uid,
 | |
|         b'gid=%d' % group.gr_gid,
 | |
|     }
 | |
| 
 | |
|     requires_path = os.path.join(cache, '.cacherequires')
 | |
|     audit_path = os.path.join(cache, '.cachelog')
 | |
| 
 | |
|     # The cache is empty. Configure it.
 | |
|     if not os.listdir(cache):
 | |
|         print_line(b'cache', b'cache %s is empty; writing requirements: '
 | |
|                              b'%s\n' % (
 | |
|                                  cache.encode('utf-8'), b' '.join(sorted(our_requirements))))
 | |
| 
 | |
|         # We write a requirements file so future invocations know what the
 | |
|         # requirements are.
 | |
|         with open(requires_path, 'wb') as fh:
 | |
|             fh.write(b'\n'.join(sorted(our_requirements)))
 | |
| 
 | |
|         # And make it read-only as a precaution against deletion.
 | |
|         os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
 | |
| 
 | |
|         write_audit_entry(audit_path,
 | |
|                           b'created; requirements: %s' %
 | |
|                           b', '.join(sorted(our_requirements)))
 | |
| 
 | |
|         set_dir_permissions(cache, user.pw_uid, group.gr_gid)
 | |
|         return
 | |
| 
 | |
|     # The cache has content and we have a requirements file. Validate
 | |
|     # requirements alignment.
 | |
|     if os.path.exists(requires_path):
 | |
|         with open(requires_path, 'rb') as fh:
 | |
|             wanted_requirements = set(fh.read().splitlines())
 | |
| 
 | |
|         print_line(b'cache', b'cache %s exists; requirements: %s\n' % (
 | |
|             cache.encode('utf-8'), b' '.join(sorted(wanted_requirements))))
 | |
| 
 | |
|         missing = wanted_requirements - our_requirements
 | |
| 
 | |
|         # Allow requirements mismatch for uid/gid if and only if caches
 | |
|         # are untrusted. This allows cache behavior on Try to be
 | |
|         # reasonable. Otherwise, random tasks could "poison" cache
 | |
|         # usability by introducing uid/gid mismatches. For untrusted
 | |
|         # environments like Try, this is a perfectly reasonable thing to
 | |
|         # allow.
 | |
|         if missing and untrusted_caches and running_as_root and \
 | |
|                 all(s.startswith((b'uid=', b'gid=')) for s in missing):
 | |
|             print_line(b'cache',
 | |
|                        b'cache %s uid/gid mismatch; this is acceptable '
 | |
|                        b'because caches for this task are untrusted; '
 | |
|                        b'changing ownership to facilitate cache use\n' %
 | |
|                        cache.encode('utf-8'))
 | |
|             chown_recursive(cache, user.pw_name, group.gr_name, user.pw_uid,
 | |
|                             group.gr_gid)
 | |
| 
 | |
|             # And write out the updated reality.
 | |
|             with open(requires_path, 'wb') as fh:
 | |
|                 fh.write(b'\n'.join(sorted(our_requirements)))
 | |
| 
 | |
|             write_audit_entry(audit_path,
 | |
|                               b'chown; requirements: %s' %
 | |
|                               b', '.join(sorted(our_requirements)))
 | |
| 
 | |
|         elif missing:
 | |
|             print('error: requirements for populated cache %s differ from '
 | |
|                   'this task' % cache)
 | |
|             print('cache requirements: %s' % ' '.join(sorted(
 | |
|                 s.decode('utf-8') for s in wanted_requirements)))
 | |
|             print('our requirements:   %s' % ' '.join(sorted(
 | |
|                 s.decode('utf-8') for s in our_requirements)))
 | |
|             if any(s.startswith((b'uid=', b'gid=')) for s in missing):
 | |
|                 print(CACHE_UID_GID_MISMATCH)
 | |
| 
 | |
|             write_audit_entry(audit_path,
 | |
|                               b'requirements mismatch; wanted: %s' %
 | |
|                               b', '.join(sorted(our_requirements)))
 | |
| 
 | |
|             print('')
 | |
|             print('audit log:')
 | |
|             with open(audit_path, 'r') as fh:
 | |
|                 print(fh.read())
 | |
| 
 | |
|             return True
 | |
|         else:
 | |
|             write_audit_entry(audit_path, b'used')
 | |
| 
 | |
|         # We don't need to adjust permissions here because the cache is
 | |
|         # associated with a uid/gid and the first task should have set
 | |
|         # a proper owner/group.
 | |
| 
 | |
|         return
 | |
| 
 | |
|     # The cache has content and no requirements file. This shouldn't
 | |
|     # happen because run-task should be the first thing that touches a
 | |
|     # cache.
 | |
|     print('error: cache %s is not empty and is missing a '
 | |
|           '.cacherequires file; the cache names for this task are '
 | |
|           'likely mis-configured or TASKCLUSTER_CACHES is not set '
 | |
|           'properly' % cache)
 | |
| 
 | |
|     write_audit_entry(audit_path, b'missing .cacherequires')
 | |
|     return True
 | |
| 
 | |
| 
 | |
| def configure_volume_posix(volume, user, group, running_as_root):
 | |
|     # The only time we should see files in the volume is if the Docker
 | |
|     # image build put files there.
 | |
|     #
 | |
|     # For the sake of simplicity, our policy is that volumes should be
 | |
|     # empty. This also has the advantage that an empty volume looks
 | |
|     # a lot like an empty cache. Tasks can rely on caches being
 | |
|     # swapped in and out on any volume without any noticeable change
 | |
|     # of behavior.
 | |
|     volume_files = os.listdir(volume)
 | |
|     if volume_files:
 | |
|         print(NON_EMPTY_VOLUME % volume)
 | |
|         print('entries in root directory: %s' %
 | |
|               ' '.join(sorted(volume_files)))
 | |
|         sys.exit(1)
 | |
| 
 | |
|     # The volume is almost certainly owned by root:root. Chown it so it
 | |
|     # is writable.
 | |
| 
 | |
|     if running_as_root:
 | |
|         print_line(b'volume', b'changing ownership of volume %s '
 | |
|                               b'to %d:%d\n' % (volume.encode('utf-8'),
 | |
|                                                user.pw_uid,
 | |
|                                                group.gr_gid))
 | |
|         set_dir_permissions(volume, user.pw_uid, group.gr_gid)
 | |
| 
 | |
| 
 | |
| def vcs_checkout(source_repo, dest, store_path,
 | |
|                  base_repo=None, revision=None, branch=None,
 | |
|                  fetch_hgfingerprint=False, sparse_profile=None):
 | |
|     # Specify method to checkout a revision. This defaults to revisions as
 | |
|     # SHA-1 strings, but also supports symbolic revisions like `tip` via the
 | |
|     # branch flag.
 | |
|     if revision:
 | |
|         revision_flag = '--revision'
 | |
|         revision_value = revision
 | |
|     elif branch:
 | |
|         revision_flag = '--branch'
 | |
|         revision_value = branch
 | |
|     else:
 | |
|         print('revision is not specified for checkout')
 | |
|         sys.exit(1)
 | |
| 
 | |
|     if IS_MACOSX or IS_POSIX:
 | |
|         hg_bin = 'hg'
 | |
|     elif IS_WINDOWS:
 | |
|         # This is where OCC installs it in the AMIs.
 | |
|         hg_bin = r'C:\Program Files\Mercurial\hg.exe'
 | |
|         if not os.path.exists(hg_bin):
 | |
|             print('could not find Mercurial executable: %s' % hg_bin)
 | |
|             sys.exit(1)
 | |
| 
 | |
|     store_path = os.path.abspath(store_path)
 | |
|     args = [
 | |
|         hg_bin,
 | |
|         'robustcheckout',
 | |
|         '--sharebase', store_path,
 | |
|         '--purge',
 | |
|     ]
 | |
| 
 | |
|     # Obtain certificate fingerprints.  Without this, the checkout will use the fingerprint
 | |
|     # on the system, which is managed some other way (such as puppet)
 | |
|     if fetch_hgfingerprint:
 | |
|         try:
 | |
|             print_line(b'vcs', b'fetching hg.mozilla.org fingerprint from %s\n' %
 | |
|                        FINGERPRINT_URL.encode('utf-8'))
 | |
|             res = urllib.request.urlopen(FINGERPRINT_URL, timeout=10)
 | |
|             secret = res.read()
 | |
|             try:
 | |
|                 secret = json.loads(secret.decode('utf-8'))
 | |
|             except ValueError:
 | |
|                 print_line(b'vcs', b'invalid JSON in hg fingerprint secret')
 | |
|                 sys.exit(1)
 | |
|         except (urllib.error.URLError, socket.timeout):
 | |
|             print_line(b'vcs', b'Unable to retrieve current hg.mozilla.org fingerprint'
 | |
|                                b'using the secret service, using fallback instead.')
 | |
|             # XXX This fingerprint will not be accurate if running on an old
 | |
|             #     revision after the server fingerprint has changed.
 | |
|             secret = {'secret': FALLBACK_FINGERPRINT}
 | |
| 
 | |
|         hgmo_fingerprint = secret['secret']['fingerprints']
 | |
|         args.extend([
 | |
|             '--config', 'hostsecurity.hg.mozilla.org:fingerprints=%s' % hgmo_fingerprint,
 | |
|         ])
 | |
| 
 | |
|     if base_repo:
 | |
|         args.extend(['--upstream', base_repo])
 | |
|     if sparse_profile:
 | |
|         args.extend(['--sparseprofile', sparse_profile])
 | |
| 
 | |
|     dest = os.path.abspath(dest)
 | |
|     args.extend([
 | |
|         revision_flag, revision_value,
 | |
|         source_repo, dest,
 | |
|     ])
 | |
| 
 | |
|     res = run_and_prefix_output(b'vcs', args,
 | |
|                                 extra_env={'PYTHONUNBUFFERED': '1'})
 | |
|     if res:
 | |
|         sys.exit(res)
 | |
| 
 | |
|     # Update the current revision hash and ensure that it is well formed.
 | |
|     revision = subprocess.check_output(
 | |
|         [hg_bin, 'log',
 | |
|          '--rev', '.',
 | |
|          '--template', '{node}'],
 | |
|         cwd=dest,
 | |
|         # Triggers text mode on Python 3.
 | |
|         universal_newlines=True)
 | |
| 
 | |
|     assert re.match('^[a-f0-9]{40}$', revision)
 | |
| 
 | |
|     msg = ("TinderboxPrint:<a href={source_repo}/rev/{revision} "
 | |
|            "title='Built from {repo_name} revision {revision}'>"
 | |
|            "{revision}</a>\n".format(revision=revision,
 | |
|                                      source_repo=source_repo,
 | |
|                                      repo_name=source_repo.split('/')[-1]))
 | |
| 
 | |
|     print_line(b'vcs', msg.encode('utf-8'))
 | |
| 
 | |
|     return revision
 | |
| 
 | |
| 
 | |
| def fetch_artifacts():
 | |
|     print_line(b'fetches', b'fetching artifacts\n')
 | |
| 
 | |
|     fetch_content = shutil.which('fetch-content')
 | |
|     if not fetch_content and os.environ.get('GECKO_PATH'):
 | |
|         fetch_content = os.path.join(os.environ['GECKO_PATH'], 'taskcluster',
 | |
|                                      'scripts', 'misc', 'fetch-content')
 | |
| 
 | |
|     if not fetch_content or not os.path.isfile(fetch_content):
 | |
|         fetch_content = os.path.join(os.path.dirname(__file__),
 | |
|                                      'fetch-content')
 | |
| 
 | |
|     if not os.path.isfile(fetch_content):
 | |
|         print(FETCH_CONTENT_NOT_FOUND)
 | |
|         sys.exit(1)
 | |
| 
 | |
|     cmd = [sys.executable, '-u', fetch_content, 'task-artifacts']
 | |
|     res = run_and_prefix_output(b'fetches', cmd)
 | |
|     if res:
 | |
|         sys.exit(res)
 | |
| 
 | |
|     print_line(b'fetches', b'finished fetching artifacts\n')
 | |
| 
 | |
| 
 | |
| def add_vcs_arguments(parser, project, name):
 | |
|     """Adds arguments to ArgumentParser to control VCS options for a project."""
 | |
| 
 | |
|     parser.add_argument('--%s-checkout' % project,
 | |
|                         help='Directory where %s checkout should be created' %
 | |
|                              name)
 | |
|     parser.add_argument('--%s-sparse-profile' % project,
 | |
|                         help='Path to sparse profile for %s checkout' % name)
 | |
| 
 | |
| 
 | |
| def resolve_checkout_url(base_repo, head_repo):
 | |
|     """Resolve the Mercurial URL to perform a checkout against, either the
 | |
|     public hg.mozilla.org service or a CI-only regional mirror.
 | |
| 
 | |
|     The config will be of the form:
 | |
|         {
 | |
|             "aws/us-west-2": {  # key built from `TASKCLUSTER_WORKER_LOCATION` variable
 | |
|                 "rate": 0.5,
 | |
|                 "domain": "us-west-2.hgmointernal.net"
 | |
|             },
 | |
|             "google/us-central1": {...}
 | |
|         }
 | |
|     """
 | |
|     worker_location = os.getenv('TASKCLUSTER_WORKER_LOCATION')
 | |
|     if not worker_location:
 | |
|         print_line(b'vcs', b'TASKCLUSTER_WORKER_LOCATION environment variable not set; '
 | |
|                            b'using public hg.mozilla.org service\n')
 | |
|         return base_repo, head_repo
 | |
| 
 | |
|     try:
 | |
|         worker_location = json.loads(worker_location)
 | |
|     except json.JSONDecodeError:
 | |
|         print_line(b'vcs', b'Could not decode TASKCLUSTER_WORKER_LOCATION environment variable '
 | |
|                            b'as JSON. Content: %s\n' % worker_location.encode('utf-8'))
 | |
|         print_line(b'vcs', b'using public hg.mozilla.org service\n')
 | |
|         return base_repo, head_repo
 | |
| 
 | |
|     if 'cloud' not in worker_location or 'region' not in worker_location:
 | |
|         print_line(b'vcs', b'TASKCLUSTER_WORKER_LOCATION missing required keys; '
 | |
|                            b'using public hg.mozilla.org service\n')
 | |
|         return base_repo, head_repo
 | |
| 
 | |
|     config_key = '%(cloud)s/%(region)s' % worker_location
 | |
| 
 | |
|     try:
 | |
|         print_line(b'vcs', b'fetching hgmointernal config from %s\n' %
 | |
|                    HGMOINTERNAL_CONFIG_URL.encode('utf-8'))
 | |
| 
 | |
|         # Get the hgmointernal config Taskcluster secret
 | |
|         res = urllib.request.urlopen(HGMOINTERNAL_CONFIG_URL, timeout=10)
 | |
|         hgmointernal_config = json.loads(res.read().decode('utf-8'))['secret']
 | |
| 
 | |
|         # Use public hg service if region not yet supported
 | |
|         if config_key not in hgmointernal_config:
 | |
|             print_line(b'vcs', b'region %s not yet supported; using public '
 | |
|                                b'hg.mozilla.org service\n' % config_key.encode('utf-8'))
 | |
| 
 | |
|             return base_repo, head_repo
 | |
| 
 | |
|         # Only send a percentage of traffic to the internal mirror
 | |
|         rate = float(hgmointernal_config[config_key]['rate'])
 | |
| 
 | |
|         if random.random() > rate:
 | |
|             print_line(b'vcs', b'hgmointernal rate miss; using '
 | |
|                                b'public hg.mozilla.org service\n')
 | |
|             return base_repo, head_repo
 | |
| 
 | |
|         print_line(b'vcs', b'hgmointernal rate hit; cloning from '
 | |
|                            b'private hgweb mirror\n')
 | |
| 
 | |
|         mirror_domain = hgmointernal_config[config_key]['domain']
 | |
| 
 | |
|         if base_repo and base_repo.startswith('https://hg.mozilla.org'):
 | |
|             base_repo = base_repo.replace('hg.mozilla.org', mirror_domain, 1)
 | |
| 
 | |
|         if head_repo and head_repo.startswith('https://hg.mozilla.org'):
 | |
|             head_repo = head_repo.replace('hg.mozilla.org', mirror_domain, 1)
 | |
| 
 | |
|         return base_repo, head_repo
 | |
| 
 | |
|     except (KeyError, ValueError):
 | |
|         print_line(b'vcs', b'invalid JSON in hgmointernal config; '
 | |
|                            b'falling back to public hg.mozilla.org service\n')
 | |
| 
 | |
|     except (urllib.error.URLError, socket.timeout):
 | |
|         print_line(b'vcs', b'Unable to retrieve hgmointernal config using '
 | |
|                            b'the secret service; falling back to public hg.mozilla.org '
 | |
|                            b'service\n')
 | |
| 
 | |
|     return base_repo, head_repo
 | |
| 
 | |
| 
 | |
| def collect_vcs_options(args, project):
 | |
|     checkout = getattr(args, '%s_checkout' % project)
 | |
|     sparse_profile = getattr(args, '%s_sparse_profile' % project)
 | |
| 
 | |
|     env_prefix = project.upper()
 | |
| 
 | |
|     base_repo = os.environ.get('%s_BASE_REPOSITORY' % env_prefix)
 | |
|     head_repo = os.environ.get('%s_HEAD_REPOSITORY' % env_prefix)
 | |
|     revision = os.environ.get('%s_HEAD_REV' % env_prefix)
 | |
|     branch = os.environ.get('%s_HEAD_REF' % env_prefix)
 | |
| 
 | |
|     store_path = os.environ.get('HG_STORE_PATH')
 | |
| 
 | |
|     # Expand ~ in some paths.
 | |
|     if checkout:
 | |
|         checkout = os.path.expanduser(checkout)
 | |
|     if store_path:
 | |
|         store_path = os.path.expanduser(store_path)
 | |
| 
 | |
|     # Some callers set the base repository to mozilla-central for historical
 | |
|     # reasons. Switch to mozilla-unified because robustcheckout works best
 | |
|     # with it.
 | |
|     if base_repo == 'https://hg.mozilla.org/mozilla-central':
 | |
|         base_repo = 'https://hg.mozilla.org/mozilla-unified'
 | |
| 
 | |
|     # No need to check the hgmointernal config if we aren't performing
 | |
|     # a checkout.
 | |
|     if checkout:
 | |
|         base_repo, head_repo = resolve_checkout_url(base_repo, head_repo)
 | |
| 
 | |
|     return {
 | |
|         'store-path': store_path,
 | |
|         'project': project,
 | |
|         'env-prefix': env_prefix,
 | |
|         'checkout': checkout,
 | |
|         'sparse-profile': sparse_profile,
 | |
|         'base-repo': base_repo,
 | |
|         'head-repo': head_repo,
 | |
|         'revision': revision,
 | |
|         'branch': branch,
 | |
|     }
 | |
| 
 | |
| 
 | |
| def vcs_checkout_from_args(args, project):
 | |
|     options = collect_vcs_options(args, project)
 | |
| 
 | |
|     if not options['checkout']:
 | |
|         if options['branch'] and not options['revision']:
 | |
|             print('task should be defined in terms of non-symbolic revision')
 | |
|             sys.exit(1)
 | |
|         return
 | |
| 
 | |
|     os.environ['%s_HEAD_REV' % options['env-prefix']] = vcs_checkout(
 | |
|         options['head-repo'],
 | |
|         options['checkout'],
 | |
|         options['store-path'],
 | |
|         base_repo=options['base-repo'],
 | |
|         revision=options['revision'],
 | |
|         fetch_hgfingerprint=args.fetch_hgfingerprint,
 | |
|         branch=options['branch'],
 | |
|         sparse_profile=options['sparse-profile'])
 | |
| 
 | |
| 
 | |
| def maybe_run_resource_monitoring():
 | |
|     """Run the resource monitor if available.
 | |
| 
 | |
|     Discussion in https://github.com/taskcluster/taskcluster-rfcs/pull/160
 | |
|     and https://bugzil.la/1648051
 | |
|     """
 | |
|     if 'MOZ_FETCHES' not in os.environ:
 | |
|         return
 | |
|     if 'RESOURCE_MONITOR_OUTPUT' not in os.environ:
 | |
|         return
 | |
| 
 | |
|     prefix = b'resource_monitor'
 | |
| 
 | |
|     executable = '{}/resource-monitor/resource-monitor{}'.format(
 | |
|         os.environ.get('MOZ_FETCHES_DIR'), '.exe' if IS_WINDOWS else '')
 | |
| 
 | |
|     if not os.path.exists(executable) or not os.access(executable, os.X_OK):
 | |
|         print_line(prefix, b"%s not executable\n" % executable.encode('utf-8'))
 | |
|         return
 | |
|     args = [
 | |
|         executable,
 | |
|         '-process',
 | |
|         str(os.getpid()),
 | |
|         '-output',
 | |
|         os.environ["RESOURCE_MONITOR_OUTPUT"],
 | |
|     ]
 | |
|     print_line(prefix, b"Resource monitor starting: %s\n" % str(args).encode('utf-8'))
 | |
|     # Avoid environment variables the payload doesn't need.
 | |
|     del os.environ['RESOURCE_MONITOR_OUTPUT']
 | |
| 
 | |
|     # Without CREATE_NEW_PROCESS_GROUP Windows signals will attempt to kill run-task, too.
 | |
|     process = subprocess.Popen(args,
 | |
|                                bufsize=0,
 | |
|                                stdout=subprocess.PIPE,
 | |
|                                stderr=subprocess.STDOUT,
 | |
|                                creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if IS_WINDOWS else 0,
 | |
|                                cwd=os.getcwd())
 | |
| 
 | |
|     def capture_output():
 | |
|         fh = io.TextIOWrapper(process.stdout, encoding='latin1')
 | |
|         while True:
 | |
|             data = fh.readline().encode('latin1')
 | |
|             if data == b'':
 | |
|                 break
 | |
|             print_line(prefix, data)
 | |
| 
 | |
|     monitor_process = Thread(target=capture_output)
 | |
|     monitor_process.start()
 | |
|     return process
 | |
| 
 | |
| 
 | |
| def main(args):
 | |
|     print_line(b'setup', b'run-task started in %s\n' % os.getcwd().encode('utf-8'))
 | |
|     running_as_root = IS_POSIX and os.getuid() == 0
 | |
| 
 | |
|     # Set a reasonable limit to the number of open files.
 | |
|     # Running under docker inherits the system defaults, which are not subject
 | |
|     # to the "standard" limits set by pam_limits.so, and while they work well
 | |
|     # for servers that may receive a lot of connections, they cause performance
 | |
|     # problems for things that close file descriptors before forking (for good
 | |
|     # reasons), like python's `subprocess.Popen(..., close_fds=True)` (and while
 | |
|     # the default was close_fds=False in python2, that changed in python3).
 | |
|     # In some cases, Firefox does the same thing when spawning subprocesses.
 | |
|     # Processes spawned by this one will inherit the limit set here.
 | |
|     try:
 | |
|         import resource
 | |
|         # Keep the hard limit the same, though, allowing processes to change their
 | |
|         # soft limit if they need to (Firefox does, for instance).
 | |
|         (soft, hard) = resource.getrlimit(resource.RLIMIT_NOFILE)
 | |
|         limit = os.environ.get('MOZ_LIMIT_NOFILE')
 | |
|         if limit:
 | |
|             limit = int(limit)
 | |
|         else:
 | |
|             # If no explicit limit is given, use 1024 if it's less than the current
 | |
|             # soft limit. For instance, the default on macOS is 256, so we'd pick
 | |
|             # that rather than 1024.
 | |
|             limit = min(soft, 1024)
 | |
|         # Now apply the limit, if it's different from the original one.
 | |
|         if limit != soft:
 | |
|             resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))
 | |
|     except ImportError:
 | |
|         # The resource module is UNIX only.
 | |
|         pass
 | |
| 
 | |
|     # Arguments up to '--' are ours. After are for the main task
 | |
|     # to be executed.
 | |
|     try:
 | |
|         i = args.index('--')
 | |
|         our_args = args[0:i]
 | |
|         task_args = args[i + 1:]
 | |
|     except ValueError:
 | |
|         our_args = args
 | |
|         task_args = []
 | |
| 
 | |
|     parser = argparse.ArgumentParser()
 | |
|     parser.add_argument('--user', default='worker', help='user to run as')
 | |
|     parser.add_argument('--group', default='worker', help='group to run as')
 | |
|     parser.add_argument('--task-cwd', help='directory to run the provided command in')
 | |
| 
 | |
|     add_vcs_arguments(parser, 'gecko', 'Firefox')
 | |
|     add_vcs_arguments(parser, 'comm', 'Comm')
 | |
| 
 | |
|     parser.add_argument('--fetch-hgfingerprint', action='store_true',
 | |
|                         help='Fetch the latest hgfingerprint from the secrets store, '
 | |
|                         'using the taskclsuerProxy')
 | |
| 
 | |
|     args = parser.parse_args(our_args)
 | |
| 
 | |
|     uid = gid = gids = None
 | |
|     if IS_POSIX and running_as_root:
 | |
|         user, group, gids = get_posix_user_group(args.user, args.group)
 | |
|         uid = user.pw_uid
 | |
|         gid = group.gr_gid
 | |
| 
 | |
|     if running_as_root and os.path.exists("/dev/kvm"):
 | |
|         # Ensure kvm permissions for worker, required for Android x86
 | |
|         st = os.stat("/dev/kvm")
 | |
|         os.chmod("/dev/kvm", st.st_mode | 0o666)
 | |
| 
 | |
|     # Validate caches.
 | |
|     #
 | |
|     # Taskgraph should pass in a list of paths that are caches via an
 | |
|     # environment variable (which we don't want to pass down to child
 | |
|     # processes).
 | |
| 
 | |
|     if 'TASKCLUSTER_CACHES' in os.environ:
 | |
|         caches = os.environ['TASKCLUSTER_CACHES'].split(';')
 | |
|         del os.environ['TASKCLUSTER_CACHES']
 | |
|     else:
 | |
|         caches = []
 | |
| 
 | |
|     if 'TASKCLUSTER_UNTRUSTED_CACHES' in os.environ:
 | |
|         untrusted_caches = True
 | |
|         del os.environ['TASKCLUSTER_UNTRUSTED_CACHES']
 | |
|     else:
 | |
|         untrusted_caches = False
 | |
| 
 | |
|     for cache in caches:
 | |
|         if not os.path.isdir(cache):
 | |
|             print('error: cache %s is not a directory; this should never '
 | |
|                   'happen' % cache)
 | |
|             return 1
 | |
| 
 | |
|         if running_as_root:
 | |
|         	purge = configure_cache_posix(cache, user, group, untrusted_caches,
 | |
|                                           running_as_root)
 | |
| 
 | |
| 	        if purge:
 | |
|                     return EXIT_PURGE_CACHE
 | |
| 
 | |
|     if 'TASKCLUSTER_VOLUMES' in os.environ:
 | |
|         volumes = os.environ['TASKCLUSTER_VOLUMES'].split(';')
 | |
|         del os.environ['TASKCLUSTER_VOLUMES']
 | |
|     else:
 | |
|         volumes = []
 | |
| 
 | |
|     if volumes and not IS_POSIX:
 | |
|         print('assertion failed: volumes not expected on Windows')
 | |
|         return 1
 | |
| 
 | |
|     # Sanitize volumes.
 | |
|     for volume in volumes:
 | |
|         # If a volume is a cache, it was dealt with above.
 | |
|         if volume in caches:
 | |
|             print_line(b'volume', b'volume %s is a cache\n' %
 | |
|                        volume.encode('utf-8'))
 | |
|             continue
 | |
| 
 | |
|         if running_as_root:
 | |
|             configure_volume_posix(volume, user, group, running_as_root)
 | |
| 
 | |
|     all_caches_and_volumes = set(map(os.path.normpath, caches))
 | |
|     all_caches_and_volumes |= set(map(os.path.normpath, volumes))
 | |
| 
 | |
|     def path_in_cache_or_volume(path):
 | |
|         path = os.path.normpath(path)
 | |
| 
 | |
|         while path:
 | |
|             if path in all_caches_and_volumes:
 | |
|                 return True
 | |
| 
 | |
|             path, child = os.path.split(path)
 | |
|             if not child:
 | |
|                 break
 | |
| 
 | |
|         return False
 | |
| 
 | |
|     def prepare_checkout_dir(checkout):
 | |
|         if not checkout:
 | |
|             return
 | |
| 
 | |
|         # The checkout path becomes the working directory. Since there are
 | |
|         # special cache files in the cache's root directory and working
 | |
|         # directory purging could blow them away, disallow this scenario.
 | |
|         if os.path.exists(os.path.join(checkout, '.cacherequires')):
 | |
|             print('error: cannot perform vcs checkout into cache root: %s' %
 | |
|                   checkout)
 | |
|             sys.exit(1)
 | |
| 
 | |
|         # TODO given the performance implications, consider making this a fatal
 | |
|         # error.
 | |
|         if not path_in_cache_or_volume(checkout):
 | |
|             print_line(b'vcs', b'WARNING: vcs checkout path (%s) not in cache '
 | |
|                                b'or volume; performance will likely suffer\n' %
 | |
|                                checkout.encode('utf-8'))
 | |
| 
 | |
|         # Ensure the directory for the source checkout exists.
 | |
|         try:
 | |
|             os.makedirs(os.path.dirname(checkout))
 | |
|         except OSError as e:
 | |
|             if e.errno != errno.EEXIST:
 | |
|                 raise
 | |
| 
 | |
|         # And that it is owned by the appropriate user/group.
 | |
|         if running_as_root:
 | |
|             os.chown(os.path.dirname(checkout), uid, gid)
 | |
| 
 | |
|     def prepare_hg_store_path():
 | |
|         # And ensure the shared store path exists and has proper permissions.
 | |
|         if 'HG_STORE_PATH' not in os.environ:
 | |
|             print('error: HG_STORE_PATH environment variable not set')
 | |
|             sys.exit(1)
 | |
| 
 | |
|         store_path = os.environ['HG_STORE_PATH']
 | |
| 
 | |
|         if not path_in_cache_or_volume(store_path):
 | |
|             print_line(b'vcs', b'WARNING: HG_STORE_PATH (%s) not in cache or '
 | |
|                                b'volume; performance will likely suffer\n' %
 | |
|                                store_path.encode('utf-8'))
 | |
| 
 | |
|         try:
 | |
|             os.makedirs(store_path)
 | |
|         except OSError as e:
 | |
|             if e.errno != errno.EEXIST:
 | |
|                 raise
 | |
| 
 | |
|         if running_as_root:
 | |
|             os.chown(store_path, uid, gid)
 | |
| 
 | |
|     prepare_checkout_dir(args.gecko_checkout)
 | |
|     if args.gecko_checkout or args.comm_checkout:
 | |
|         prepare_hg_store_path()
 | |
| 
 | |
|     if IS_POSIX and running_as_root:
 | |
|         # Drop permissions to requested user.
 | |
|         # This code is modeled after what `sudo` was observed to do in a Docker
 | |
|         # container. We do not bother calling setrlimit() because containers have
 | |
|         # their own limits.
 | |
|         print_line(b'setup', b'running as %s:%s\n' % (
 | |
|             args.user.encode('utf-8'), args.group.encode('utf-8')))
 | |
| 
 | |
|         os.setgroups(gids)
 | |
|         os.umask(0o22)
 | |
|         os.setresgid(gid, gid, gid)
 | |
|         os.setresuid(uid, uid, uid)
 | |
| 
 | |
|     vcs_checkout_from_args(args, 'gecko')
 | |
|     vcs_checkout_from_args(args, 'comm')
 | |
| 
 | |
|     resource_process = None
 | |
| 
 | |
|     try:
 | |
|         for k in ('GECKO_PATH', 'MOZ_FETCHES_DIR', 'UPLOAD_DIR'):
 | |
|             if k in os.environ:
 | |
|                 # Normalize paths to use forward slashes. Some shell scripts
 | |
|                 # tolerate that better on Windows.
 | |
|                 os.environ[k] = os.path.abspath(os.environ[k]).replace(os.sep, '/')
 | |
|                 print_line(b'setup', b'%s is %s\n' % (
 | |
|                     k.encode('utf-8'),
 | |
|                     os.environ[k].encode('utf-8')))
 | |
| 
 | |
|         if 'MOZ_FETCHES' in os.environ:
 | |
|             fetch_artifacts()
 | |
| 
 | |
|         resource_process = maybe_run_resource_monitoring()
 | |
| 
 | |
|         return run_and_prefix_output(b'task', task_args, cwd=args.task_cwd)
 | |
|     finally:
 | |
|         if resource_process:
 | |
|             print_line(b'resource_monitor', b'terminating\n')
 | |
|             if IS_WINDOWS:
 | |
|                 # .terminate() on Windows is not a graceful shutdown, due to
 | |
|                 # differences in signals. CTRL_BREAK_EVENT will work provided
 | |
|                 # the subprocess is in a different process group, so this script
 | |
|                 # isn't also killed.
 | |
|                 os.kill(resource_process.pid, signal.CTRL_BREAK_EVENT)
 | |
|             else:
 | |
|                 resource_process.terminate()
 | |
|             resource_process.wait()
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     sys.exit(main(sys.argv[1:]))
 | 
