fune/taskcluster/taskgraph/util/schema.py
Nick Thomas 0b738d5db5 Bug 1471767 - taskcluster documentation fixes, r=dustin
Assorted fixes from trawling the sphinx logs - malformed formatting, broken references, leftovers from renaming action-task to action-callback and removing
yaml-templates, docstring fixes to make sphinx happier, and typos.

MozReview-Commit-ID: 6jUOljdLoE2

--HG--
extra : rebase_source : f2a9dbcde5180f760a80f47691a70eba76e58bad
2018-06-27 21:48:10 +12:00

219 lines
7.7 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, print_function, unicode_literals
import re
import pprint
import collections
import voluptuous
import taskgraph
from mozbuild import schedules
from .attributes import keymatch
def validate_schema(schema, obj, msg_prefix):
"""
Validate that object satisfies schema. If not, generate a useful exception
beginning with msg_prefix.
"""
if taskgraph.fast:
return
try:
schema(obj)
except voluptuous.MultipleInvalid as exc:
msg = [msg_prefix]
for error in exc.errors:
msg.append(str(error))
raise Exception('\n'.join(msg) + '\n' + pprint.pformat(obj))
def optionally_keyed_by(*arguments):
"""
Mark a schema value as optionally keyed by any of a number of fields. The
schema is the last argument, and the remaining fields are taken to be the
field names. For example:
'some-value': optionally_keyed_by(
'test-platform', 'build-platform',
Any('a', 'b', 'c'))
The resulting schema will allow nesting of `by-test-platform` and
`by-build-platform` in either order.
"""
schema = arguments[-1]
fields = arguments[:-1]
# build the nestable schema by generating schema = Any(schema,
# by-fld1, by-fld2, by-fld3) once for each field. So we don't allow
# infinite nesting, but one level of nesting for each field.
for _ in arguments:
options = [schema]
for field in fields:
options.append({'by-' + field: {basestring: schema}})
schema = voluptuous.Any(*options)
return schema
def resolve_keyed_by(item, field, item_name, **extra_values):
"""
For values which can either accept a literal value, or be keyed by some
other attribute of the item, perform that lookup and replacement in-place
(modifying `item` directly). The field is specified using dotted notation
to traverse dictionaries.
For example, given item::
job:
test-platform: linux128
chunks:
by-test-platform:
macosx-10.11/debug: 13
win.*: 6
default: 12
a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
would mutate item in-place to::
job:
chunks: 12
The `item_name` parameter is used to generate useful error messages.
If extra_values are supplied, they represent additional values available
for reference from by-<field>.
Items can be nested as deeply as the schema will allow::
chunks:
by-test-platform:
win.*:
by-project:
ash: ..
cedar: ..
linux: 13
default: 12
"""
# find the field, returning the item unchanged if anything goes wrong
container, subfield = item, field
while '.' in subfield:
f, subfield = subfield.split('.', 1)
if f not in container:
return item
container = container[f]
if not isinstance(container, dict):
return item
if subfield not in container:
return item
value = container[subfield]
while True:
if not isinstance(value, dict) or len(value) != 1 or not value.keys()[0].startswith('by-'):
return item
keyed_by = value.keys()[0][3:] # strip off 'by-' prefix
key = extra_values.get(keyed_by) if keyed_by in extra_values else item[keyed_by]
alternatives = value.values()[0]
if len(alternatives) == 1 and 'default' in alternatives:
# Error out when only 'default' is specified as only alternatives,
# because we don't need to by-{keyed_by} there.
raise Exception(
"Keyed-by '{}' unnecessary with only value 'default' "
"found, when determining item '{}' in '{}'".format(
keyed_by, field, item_name))
matches = keymatch(alternatives, key)
if len(matches) > 1:
raise Exception(
"Multiple matching values for {} {!r} found while "
"determining item {} in {}".format(
keyed_by, key, field, item_name))
elif matches:
value = container[subfield] = matches[0]
continue
raise Exception(
"No {} matching {!r} nor 'default' found while determining item {} in {}".format(
keyed_by, key, field, item_name))
# Schemas for YAML files should use dashed identifiers by default. If there are
# components of the schema for which there is a good reason to use another format,
# they can be whitelisted here.
WHITELISTED_SCHEMA_IDENTIFIERS = [
# upstream-artifacts are handed directly to scriptWorker, which expects interCaps
lambda path: "[u'upstream-artifacts']" in path,
]
def check_schema(schema):
identifier_re = re.compile('^[a-z][a-z0-9-]*$')
def whitelisted(path):
return any(f(path) for f in WHITELISTED_SCHEMA_IDENTIFIERS)
def iter(path, sch):
def check_identifier(path, k):
if k in (basestring, voluptuous.Extra):
pass
elif isinstance(k, basestring):
if not identifier_re.match(k) and not whitelisted(path):
raise RuntimeError(
'YAML schemas should use dashed lower-case identifiers, '
'not {!r} @ {}'.format(k, path))
elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
check_identifier(path, k.schema)
elif isinstance(k, voluptuous.Any):
for v in k.validators:
check_identifier(path, v)
elif not whitelisted(path):
raise RuntimeError(
'Unexpected type in YAML schema: {} @ {}'.format(
type(k).__name__, path))
if isinstance(sch, collections.Mapping):
for k, v in sch.iteritems():
child = "{}[{!r}]".format(path, k)
check_identifier(child, k)
iter(child, v)
elif isinstance(sch, (list, tuple)):
for i, v in enumerate(sch):
iter("{}[{}]".format(path, i), v)
elif isinstance(sch, voluptuous.Any):
for v in sch.validators:
iter(path, v)
iter('schema', schema.schema)
def Schema(*args, **kwargs):
"""
Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
in the process.
"""
schema = voluptuous.Schema(*args, **kwargs)
check_schema(schema)
return schema
OptimizationSchema = voluptuous.Any(
# always run this task (default)
None,
# search the index for the given index namespaces, and replace this task if found
# the search occurs in order, with the first match winning
{'index-search': [basestring]},
# consult SETA and skip this task if it is low-value
{'seta': None},
# skip this task if none of the given file patterns match
{'skip-unless-changed': [basestring]},
# skip this task if unless the change files' SCHEDULES contains any of these components
{'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
# skip if SETA or skip-unless-schedules says to
{'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)},
# only run this task if its dependencies will run (useful for follow-on tasks that
# are unnecessary if the parent tasks are not run)
{'only-if-dependencies-run': None}
)