forked from mirrors/gecko-dev
# ignore-this-changeset Differential Revision: https://phabricator.services.mozilla.com/D162670
1009 lines
36 KiB
Python
1009 lines
36 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import atexit
|
|
import collections
|
|
import itertools
|
|
import json
|
|
import math
|
|
import os
|
|
import re
|
|
import runpy
|
|
import sys
|
|
from collections import OrderedDict
|
|
from ctypes import c_int
|
|
|
|
from . import shared_telemetry_utils as utils
|
|
from .shared_telemetry_utils import ParserError
|
|
|
|
atexit.register(ParserError.exit_func)
|
|
|
|
# Constants.
|
|
MAX_LABEL_LENGTH = 20
|
|
MAX_LABEL_COUNT = 100
|
|
MAX_KEY_COUNT = 30
|
|
MAX_KEY_LENGTH = 20
|
|
MIN_CATEGORICAL_BUCKET_COUNT = 50
|
|
CPP_IDENTIFIER_PATTERN = "^[a-z][a-z0-9_]+[a-z0-9]$"
|
|
|
|
ALWAYS_ALLOWED_KEYS = [
|
|
"kind",
|
|
"description",
|
|
"operating_systems",
|
|
"expires_in_version",
|
|
"alert_emails",
|
|
"keyed",
|
|
"releaseChannelCollection",
|
|
"bug_numbers",
|
|
"keys",
|
|
"record_in_processes",
|
|
"record_into_store",
|
|
"products",
|
|
]
|
|
|
|
BASE_DOC_URL = (
|
|
"https://firefox-source-docs.mozilla.org/toolkit/components/" "telemetry/telemetry/"
|
|
)
|
|
HISTOGRAMS_DOC_URL = BASE_DOC_URL + "collection/histograms.html"
|
|
SCALARS_DOC_URL = BASE_DOC_URL + "collection/scalars.html"
|
|
|
|
GECKOVIEW_STREAMING_SUPPORTED_KINDS = [
|
|
"linear",
|
|
"exponential",
|
|
"categorical",
|
|
]
|
|
|
|
# parse_histograms.py is used by scripts from a mozilla-central build tree
|
|
# and also by outside consumers, such as the telemetry server. We need
|
|
# to ensure that importing things works in both contexts. Therefore,
|
|
# unconditionally importing things that are local to the build tree, such
|
|
# as buildconfig, is a no-no.
|
|
try:
|
|
import buildconfig
|
|
|
|
# Need to update sys.path to be able to find usecounters.
|
|
sys.path.append(os.path.join(buildconfig.topsrcdir, "dom/base/"))
|
|
except ImportError:
|
|
# Must be in an out-of-tree usage scenario. Trust that whoever is
|
|
# running this script knows we need the usecounters module and has
|
|
# ensured it's in our sys.path.
|
|
pass
|
|
|
|
|
|
def linear_buckets(dmin, dmax, n_buckets):
|
|
ret_array = [0] * n_buckets
|
|
dmin = float(dmin)
|
|
dmax = float(dmax)
|
|
for i in range(1, n_buckets):
|
|
linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
|
|
ret_array[i] = int(linear_range + 0.5)
|
|
return ret_array
|
|
|
|
|
|
def exponential_buckets(dmin, dmax, n_buckets):
|
|
log_max = math.log(dmax)
|
|
bucket_index = 2
|
|
ret_array = [0] * n_buckets
|
|
current = dmin
|
|
ret_array[1] = current
|
|
for bucket_index in range(2, n_buckets):
|
|
log_current = math.log(current)
|
|
log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
|
|
log_next = log_current + log_ratio
|
|
next_value = int(math.floor(math.exp(log_next) + 0.5))
|
|
if next_value > current:
|
|
current = next_value
|
|
else:
|
|
current = current + 1
|
|
ret_array[bucket_index] = current
|
|
return ret_array
|
|
|
|
|
|
allowlists = None
|
|
|
|
|
|
def load_allowlist():
|
|
global allowlists
|
|
try:
|
|
parsers_path = os.path.realpath(os.path.dirname(__file__))
|
|
# The parsers live in build_scripts/parsers in the Telemetry module, while
|
|
# the histogram-allowlists file lives in the root of the module. Account
|
|
# for that when looking for the allowlist.
|
|
# NOTE: if the parsers are moved, this logic will need to be updated.
|
|
telemetry_module_path = os.path.abspath(
|
|
os.path.join(parsers_path, os.pardir, os.pardir)
|
|
)
|
|
allowlist_path = os.path.join(
|
|
telemetry_module_path, "histogram-allowlists.json"
|
|
)
|
|
with open(allowlist_path, "r") as f:
|
|
try:
|
|
allowlists = json.load(f)
|
|
for name, allowlist in allowlists.items():
|
|
allowlists[name] = set(allowlist)
|
|
except ValueError:
|
|
ParserError("Error parsing allowlist: %s" % allowlist_path).handle_now()
|
|
except IOError:
|
|
allowlists = None
|
|
ParserError("Unable to parse allowlist: %s." % allowlist_path).handle_now()
|
|
|
|
|
|
class Histogram:
|
|
"""A class for representing a histogram definition."""
|
|
|
|
def __init__(self, name, definition, strict_type_checks=False):
|
|
"""Initialize a histogram named name with the given definition.
|
|
definition is a dict-like object that must contain at least the keys:
|
|
|
|
- 'kind': The kind of histogram. Must be one of 'boolean', 'flag',
|
|
'count', 'enumerated', 'linear', or 'exponential'.
|
|
- 'description': A textual description of the histogram.
|
|
- 'strict_type_checks': A boolean indicating whether to use the new, stricter type checks.
|
|
The server-side still has to deal with old, oddly typed
|
|
submissions, so we have to skip them there by default.
|
|
"""
|
|
self._strict_type_checks = strict_type_checks
|
|
self._is_use_counter = name.startswith("USE_COUNTER2_")
|
|
if self._is_use_counter:
|
|
definition.setdefault("record_in_processes", ["main", "content"])
|
|
definition.setdefault("releaseChannelCollection", "opt-out")
|
|
definition.setdefault("products", ["firefox", "fennec"])
|
|
self.verify_attributes(name, definition)
|
|
self._name = name
|
|
self._description = definition["description"]
|
|
self._kind = definition["kind"]
|
|
self._keys = definition.get("keys", [])
|
|
self._keyed = definition.get("keyed", False)
|
|
self._expiration = definition.get("expires_in_version")
|
|
self._labels = definition.get("labels", [])
|
|
self._record_in_processes = definition.get("record_in_processes")
|
|
self._record_into_store = definition.get("record_into_store", ["main"])
|
|
self._products = definition.get("products")
|
|
self._operating_systems = definition.get("operating_systems", ["all"])
|
|
|
|
self.compute_bucket_parameters(definition)
|
|
self.set_nsITelemetry_kind()
|
|
self.set_dataset(definition)
|
|
|
|
def name(self):
|
|
"""Return the name of the histogram."""
|
|
return self._name
|
|
|
|
def description(self):
|
|
"""Return the description of the histogram."""
|
|
return self._description
|
|
|
|
def kind(self):
|
|
"""Return the kind of the histogram.
|
|
Will be one of 'boolean', 'flag', 'count', 'enumerated', 'categorical', 'linear',
|
|
or 'exponential'."""
|
|
return self._kind
|
|
|
|
def expiration(self):
|
|
"""Return the expiration version of the histogram."""
|
|
return self._expiration
|
|
|
|
def nsITelemetry_kind(self):
|
|
"""Return the nsITelemetry constant corresponding to the kind of
|
|
the histogram."""
|
|
return self._nsITelemetry_kind
|
|
|
|
def low(self):
|
|
"""Return the lower bound of the histogram."""
|
|
return self._low
|
|
|
|
def high(self):
|
|
"""Return the high bound of the histogram."""
|
|
return self._high
|
|
|
|
def n_buckets(self):
|
|
"""Return the number of buckets in the histogram."""
|
|
return self._n_buckets
|
|
|
|
def keyed(self):
|
|
"""Returns True if this a keyed histogram, false otherwise."""
|
|
return self._keyed
|
|
|
|
def keys(self):
|
|
"""Returns a list of allowed keys for keyed histogram, [] for others."""
|
|
return self._keys
|
|
|
|
def dataset(self):
|
|
"""Returns the dataset this histogram belongs into."""
|
|
return self._dataset
|
|
|
|
def labels(self):
|
|
"""Returns a list of labels for a categorical histogram, [] for others."""
|
|
return self._labels
|
|
|
|
def record_in_processes(self):
|
|
"""Returns a list of processes this histogram is permitted to record in."""
|
|
return self._record_in_processes
|
|
|
|
def record_in_processes_enum(self):
|
|
"""Get the non-empty list of flags representing the processes to record data in"""
|
|
return [utils.process_name_to_enum(p) for p in self.record_in_processes()]
|
|
|
|
def products(self):
|
|
"""Get the non-empty list of products to record data on"""
|
|
return self._products
|
|
|
|
def products_enum(self):
|
|
"""Get the non-empty list of flags representing products to record data on"""
|
|
return [utils.product_name_to_enum(p) for p in self.products()]
|
|
|
|
def operating_systems(self):
|
|
"""Get the list of operating systems to record data on"""
|
|
return self._operating_systems
|
|
|
|
def record_on_os(self, target_os):
|
|
"""Check if this probe should be recorded on the passed os."""
|
|
os = self.operating_systems()
|
|
if "all" in os:
|
|
return True
|
|
|
|
canonical_os = utils.canonical_os(target_os)
|
|
|
|
if "unix" in os and canonical_os in utils.UNIX_LIKE_OS:
|
|
return True
|
|
|
|
return canonical_os in os
|
|
|
|
def record_into_store(self):
|
|
"""Get the non-empty list of stores to record into"""
|
|
return self._record_into_store
|
|
|
|
def ranges(self):
|
|
"""Return an array of lower bounds for each bucket in the histogram."""
|
|
bucket_fns = {
|
|
"boolean": linear_buckets,
|
|
"flag": linear_buckets,
|
|
"count": linear_buckets,
|
|
"enumerated": linear_buckets,
|
|
"categorical": linear_buckets,
|
|
"linear": linear_buckets,
|
|
"exponential": exponential_buckets,
|
|
}
|
|
|
|
if self._kind not in bucket_fns:
|
|
ParserError(
|
|
'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
|
|
).handle_later()
|
|
|
|
fn = bucket_fns[self._kind]
|
|
return fn(self.low(), self.high(), self.n_buckets())
|
|
|
|
def compute_bucket_parameters(self, definition):
|
|
bucket_fns = {
|
|
"boolean": Histogram.boolean_flag_bucket_parameters,
|
|
"flag": Histogram.boolean_flag_bucket_parameters,
|
|
"count": Histogram.boolean_flag_bucket_parameters,
|
|
"enumerated": Histogram.enumerated_bucket_parameters,
|
|
"categorical": Histogram.categorical_bucket_parameters,
|
|
"linear": Histogram.linear_bucket_parameters,
|
|
"exponential": Histogram.exponential_bucket_parameters,
|
|
}
|
|
|
|
if self._kind not in bucket_fns:
|
|
ParserError(
|
|
'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
|
|
).handle_later()
|
|
|
|
fn = bucket_fns[self._kind]
|
|
self.set_bucket_parameters(*fn(definition))
|
|
|
|
def verify_attributes(self, name, definition):
|
|
general_keys = ALWAYS_ALLOWED_KEYS + ["low", "high", "n_buckets"]
|
|
|
|
table = {
|
|
"boolean": ALWAYS_ALLOWED_KEYS,
|
|
"flag": ALWAYS_ALLOWED_KEYS,
|
|
"count": ALWAYS_ALLOWED_KEYS,
|
|
"enumerated": ALWAYS_ALLOWED_KEYS + ["n_values"],
|
|
"categorical": ALWAYS_ALLOWED_KEYS + ["labels", "n_values"],
|
|
"linear": general_keys,
|
|
"exponential": general_keys,
|
|
}
|
|
# We removed extended_statistics_ok on the client, but the server-side,
|
|
# where _strict_type_checks==False, has to deal with historical data.
|
|
if not self._strict_type_checks:
|
|
table["exponential"].append("extended_statistics_ok")
|
|
|
|
kind = definition["kind"]
|
|
if kind not in table:
|
|
ParserError(
|
|
'Unknown kind "%s" for histogram "%s".' % (kind, name)
|
|
).handle_later()
|
|
allowed_keys = table[kind]
|
|
|
|
self.check_name(name)
|
|
self.check_keys(name, definition, allowed_keys)
|
|
self.check_keys_field(name, definition)
|
|
self.check_field_types(name, definition)
|
|
self.check_allowlisted_kind(name, definition)
|
|
self.check_allowlistable_fields(name, definition)
|
|
self.check_expiration(name, definition)
|
|
self.check_label_values(name, definition)
|
|
self.check_record_in_processes(name, definition)
|
|
self.check_products(name, definition)
|
|
self.check_operating_systems(name, definition)
|
|
self.check_record_into_store(name, definition)
|
|
|
|
def check_name(self, name):
|
|
if "#" in name:
|
|
ParserError(
|
|
'Error for histogram name "%s": "#" is not allowed.' % (name)
|
|
).handle_later()
|
|
|
|
# Avoid C++ identifier conflicts between histogram enums and label enum names.
|
|
if name.startswith("LABELS_"):
|
|
ParserError(
|
|
'Error for histogram name "%s": can not start with "LABELS_".' % (name)
|
|
).handle_later()
|
|
|
|
# To make it easier to generate C++ identifiers from this etc., we restrict
|
|
# the histogram names to a strict pattern.
|
|
# We skip this on the server to avoid failures with old Histogram.json revisions.
|
|
if self._strict_type_checks:
|
|
if not re.match(CPP_IDENTIFIER_PATTERN, name, re.IGNORECASE):
|
|
ParserError(
|
|
'Error for histogram name "%s": name does not conform to "%s"'
|
|
% (name, CPP_IDENTIFIER_PATTERN)
|
|
).handle_later()
|
|
|
|
def check_expiration(self, name, definition):
|
|
field = "expires_in_version"
|
|
expiration = definition.get(field)
|
|
|
|
if not expiration:
|
|
return
|
|
|
|
# We forbid new probes from using "expires_in_version" : "default" field/value pair.
|
|
# Old ones that use this are added to the allowlist.
|
|
if (
|
|
expiration == "default"
|
|
and allowlists is not None
|
|
and name not in allowlists["expiry_default"]
|
|
):
|
|
ParserError(
|
|
'New histogram "%s" cannot have "default" %s value.' % (name, field)
|
|
).handle_later()
|
|
|
|
# Historical editions of Histograms.json can have the deprecated
|
|
# expiration format 'N.Na1'. Fortunately, those scripts set
|
|
# self._strict_type_checks to false.
|
|
if (
|
|
expiration != "default"
|
|
and not utils.validate_expiration_version(expiration)
|
|
and self._strict_type_checks
|
|
):
|
|
ParserError(
|
|
(
|
|
"Error for histogram {} - invalid {}: {}."
|
|
"\nSee: {}#expires-in-version"
|
|
).format(name, field, expiration, HISTOGRAMS_DOC_URL)
|
|
).handle_later()
|
|
|
|
expiration = utils.add_expiration_postfix(expiration)
|
|
|
|
definition[field] = expiration
|
|
|
|
def check_label_values(self, name, definition):
|
|
labels = definition.get("labels")
|
|
if not labels:
|
|
return
|
|
|
|
invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels)
|
|
if len(list(invalid)) > 0:
|
|
ParserError(
|
|
'Label values for "%s" exceed length limit of %d: %s'
|
|
% (name, MAX_LABEL_LENGTH, ", ".join(invalid))
|
|
).handle_later()
|
|
|
|
if len(labels) > MAX_LABEL_COUNT:
|
|
ParserError(
|
|
'Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)
|
|
).handle_now()
|
|
|
|
# To make it easier to generate C++ identifiers from this etc., we restrict
|
|
# the label values to a strict pattern.
|
|
invalid = filter(
|
|
lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels
|
|
)
|
|
if len(list(invalid)) > 0:
|
|
ParserError(
|
|
'Label values for %s are not matching pattern "%s": %s'
|
|
% (name, CPP_IDENTIFIER_PATTERN, ", ".join(invalid))
|
|
).handle_later()
|
|
|
|
def check_record_in_processes(self, name, definition):
|
|
if not self._strict_type_checks:
|
|
return
|
|
|
|
field = "record_in_processes"
|
|
rip = definition.get(field)
|
|
|
|
DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes"
|
|
|
|
if not rip:
|
|
ParserError(
|
|
'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
|
|
).handle_later()
|
|
|
|
for process in rip:
|
|
if not utils.is_valid_process_name(process):
|
|
ParserError(
|
|
'Histogram "%s" has unknown process "%s" in %s.\n%s'
|
|
% (name, process, field, DOC_URL)
|
|
).handle_later()
|
|
|
|
def check_products(self, name, definition):
|
|
if not self._strict_type_checks:
|
|
return
|
|
|
|
field = "products"
|
|
products = definition.get(field)
|
|
|
|
DOC_URL = HISTOGRAMS_DOC_URL + "#products"
|
|
|
|
if not products:
|
|
ParserError(
|
|
'Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)
|
|
).handle_now()
|
|
|
|
for product in products:
|
|
if not utils.is_valid_product(product):
|
|
ParserError(
|
|
'Histogram "%s" has unknown product "%s" in %s.\n%s'
|
|
% (name, product, field, DOC_URL)
|
|
).handle_later()
|
|
if utils.is_geckoview_streaming_product(product):
|
|
kind = definition.get("kind")
|
|
if kind not in GECKOVIEW_STREAMING_SUPPORTED_KINDS:
|
|
ParserError(
|
|
(
|
|
'Histogram "%s" is of kind "%s" which is unsupported for '
|
|
'product "%s".'
|
|
)
|
|
% (name, kind, product)
|
|
).handle_later()
|
|
keyed = definition.get("keyed")
|
|
if keyed:
|
|
ParserError(
|
|
'Keyed histograms like "%s" are unsupported for product "%s"'
|
|
% (name, product)
|
|
).handle_later()
|
|
|
|
def check_operating_systems(self, name, definition):
|
|
if not self._strict_type_checks:
|
|
return
|
|
|
|
field = "operating_systems"
|
|
operating_systems = definition.get(field)
|
|
|
|
DOC_URL = HISTOGRAMS_DOC_URL + "#operating-systems"
|
|
|
|
if not operating_systems:
|
|
# operating_systems is optional
|
|
return
|
|
|
|
for operating_system in operating_systems:
|
|
if not utils.is_valid_os(operating_system):
|
|
ParserError(
|
|
'Histogram "%s" has unknown operating system "%s" in %s.\n%s'
|
|
% (name, operating_system, field, DOC_URL)
|
|
).handle_later()
|
|
|
|
def check_record_into_store(self, name, definition):
|
|
if not self._strict_type_checks:
|
|
return
|
|
|
|
field = "record_into_store"
|
|
DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store"
|
|
|
|
if field not in definition:
|
|
# record_into_store is optional
|
|
return
|
|
|
|
record_into_store = definition.get(field)
|
|
# record_into_store should not be empty
|
|
if not record_into_store:
|
|
ParserError(
|
|
'Histogram "%s" has empty list of stores, which is not allowed.\n%s'
|
|
% (name, DOC_URL)
|
|
).handle_later()
|
|
|
|
def check_keys_field(self, name, definition):
|
|
keys = definition.get("keys")
|
|
if not self._strict_type_checks or keys is None:
|
|
return
|
|
|
|
if not definition.get("keyed", False):
|
|
raise ValueError(
|
|
"'keys' field is not valid for %s; only allowed for keyed histograms."
|
|
% (name)
|
|
)
|
|
|
|
if len(keys) == 0:
|
|
raise ValueError("The key list for %s cannot be empty" % (name))
|
|
|
|
if len(keys) > MAX_KEY_COUNT:
|
|
raise ValueError(
|
|
"Label count for %s exceeds limit of %d" % (name, MAX_KEY_COUNT)
|
|
)
|
|
|
|
invalid = filter(lambda k: len(k) > MAX_KEY_LENGTH, keys)
|
|
if len(list(invalid)) > 0:
|
|
raise ValueError(
|
|
'"keys" values for %s are exceeding length "%d": %s'
|
|
% (name, MAX_KEY_LENGTH, ", ".join(invalid))
|
|
)
|
|
|
|
def check_allowlisted_kind(self, name, definition):
|
|
# We don't need to run any of these checks on the server.
|
|
if not self._strict_type_checks or allowlists is None:
|
|
return
|
|
|
|
# Disallow "flag" and "count" histograms on desktop, suggest to use
|
|
# scalars instead. Allow using these histograms on Android, as we
|
|
# don't support scalars there yet.
|
|
hist_kind = definition.get("kind")
|
|
android_target = "android" in definition.get("operating_systems", [])
|
|
|
|
if (
|
|
not android_target
|
|
and hist_kind in ["flag", "count"]
|
|
and name not in allowlists["kind"]
|
|
):
|
|
ParserError(
|
|
(
|
|
'Unsupported kind "%s" for histogram "%s":\n'
|
|
'New "%s" histograms are not supported on Desktop, you should'
|
|
" use scalars instead:\n"
|
|
"%s\n"
|
|
"Are you trying to add a histogram on Android?"
|
|
' Add "operating_systems": ["android"] to your histogram definition.'
|
|
)
|
|
% (hist_kind, name, hist_kind, SCALARS_DOC_URL)
|
|
).handle_now()
|
|
|
|
# Check for the presence of fields that old histograms are allowlisted for.
|
|
def check_allowlistable_fields(self, name, definition):
|
|
# Use counters don't have any mechanism to add the fields checked here,
|
|
# so skip the check for them.
|
|
# We also don't need to run any of these checks on the server.
|
|
if self._is_use_counter or not self._strict_type_checks:
|
|
return
|
|
|
|
# In the pipeline we don't have allowlists available.
|
|
if allowlists is None:
|
|
return
|
|
|
|
for field in ["alert_emails", "bug_numbers"]:
|
|
if field not in definition and name not in allowlists[field]:
|
|
ParserError(
|
|
'New histogram "%s" must have a "%s" field.' % (name, field)
|
|
).handle_later()
|
|
if field in definition and name in allowlists[field]:
|
|
msg = (
|
|
'Histogram "%s" should be removed from the allowlist for "%s" in '
|
|
"histogram-allowlists.json."
|
|
)
|
|
ParserError(msg % (name, field)).handle_later()
|
|
|
|
def check_field_types(self, name, definition):
|
|
# Define expected types for the histogram properties.
|
|
type_checked_fields = {
|
|
"n_buckets": int,
|
|
"n_values": int,
|
|
"low": int,
|
|
"high": int,
|
|
"keyed": bool,
|
|
"expires_in_version": str,
|
|
"kind": str,
|
|
"description": str,
|
|
"releaseChannelCollection": str,
|
|
}
|
|
|
|
# For list fields we check the items types.
|
|
type_checked_list_fields = {
|
|
"bug_numbers": int,
|
|
"alert_emails": str,
|
|
"labels": str,
|
|
"record_in_processes": str,
|
|
"keys": str,
|
|
"products": str,
|
|
"operating_systems": str,
|
|
"record_into_store": str,
|
|
}
|
|
|
|
# For the server-side, where _strict_type_checks==False, we want to
|
|
# skip the stricter type checks for these fields for dealing with
|
|
# historical data.
|
|
coerce_fields = ["low", "high", "n_values", "n_buckets"]
|
|
if not self._strict_type_checks:
|
|
# This handles some old non-numeric expressions.
|
|
EXPRESSIONS = {
|
|
"JS::GCReason::NUM_TELEMETRY_REASONS": 101,
|
|
"mozilla::StartupTimeline::MAX_EVENT_ID": 12,
|
|
}
|
|
|
|
def try_to_coerce_to_number(v):
|
|
if v in EXPRESSIONS:
|
|
return EXPRESSIONS[v]
|
|
try:
|
|
return eval(v, {})
|
|
except Exception:
|
|
return v
|
|
|
|
for key in [k for k in coerce_fields if k in definition]:
|
|
definition[key] = try_to_coerce_to_number(definition[key])
|
|
# This handles old "keyed":"true" definitions (bug 1271986).
|
|
if definition.get("keyed", None) == "true":
|
|
definition["keyed"] = True
|
|
|
|
def nice_type_name(t):
|
|
if t is str:
|
|
return "string"
|
|
return t.__name__
|
|
|
|
for key, key_type in type_checked_fields.items():
|
|
if key not in definition:
|
|
continue
|
|
if not isinstance(definition[key], key_type):
|
|
ParserError(
|
|
'Value for key "{0}" in histogram "{1}" should be {2}.'.format(
|
|
key, name, nice_type_name(key_type)
|
|
)
|
|
).handle_later()
|
|
|
|
# Make sure the max range is lower than or equal to INT_MAX
|
|
if "high" in definition and not c_int(definition["high"]).value > 0:
|
|
ParserError(
|
|
'Value for high in histogram "{0}" should be lower or equal to INT_MAX.'.format(
|
|
nice_type_name(c_int)
|
|
)
|
|
).handle_later()
|
|
|
|
for key, key_type in type_checked_list_fields.items():
|
|
if key not in definition:
|
|
continue
|
|
if not all(isinstance(x, key_type) for x in definition[key]):
|
|
ParserError(
|
|
'All values for list "{0}" in histogram "{1}" should be of type'
|
|
" {2}.".format(key, name, nice_type_name(key_type))
|
|
).handle_later()
|
|
|
|
def check_keys(self, name, definition, allowed_keys):
|
|
if not self._strict_type_checks:
|
|
return
|
|
for key in iter(definition.keys()):
|
|
if key not in allowed_keys:
|
|
ParserError(
|
|
'Key "%s" is not allowed for histogram "%s".' % (key, name)
|
|
).handle_later()
|
|
|
|
def set_bucket_parameters(self, low, high, n_buckets):
|
|
self._low = low
|
|
self._high = high
|
|
self._n_buckets = n_buckets
|
|
max_n_buckets = 101 if self._kind in ["enumerated", "categorical"] else 100
|
|
if (
|
|
allowlists is not None
|
|
and self._n_buckets > max_n_buckets
|
|
and type(self._n_buckets) is int
|
|
):
|
|
if self._name not in allowlists["n_buckets"]:
|
|
ParserError(
|
|
'New histogram "%s" is not permitted to have more than 100 buckets.\n'
|
|
"Histograms with large numbers of buckets use disproportionately high"
|
|
" amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)"
|
|
" if you think an exception ought to be made:\n"
|
|
"https://wiki.mozilla.org/Modules/Toolkit#Telemetry" % self._name
|
|
).handle_later()
|
|
|
|
@staticmethod
|
|
def boolean_flag_bucket_parameters(definition):
|
|
return (1, 2, 3)
|
|
|
|
@staticmethod
|
|
def linear_bucket_parameters(definition):
|
|
return (definition.get("low", 1), definition["high"], definition["n_buckets"])
|
|
|
|
@staticmethod
|
|
def enumerated_bucket_parameters(definition):
|
|
n_values = definition["n_values"]
|
|
return (1, n_values, n_values + 1)
|
|
|
|
@staticmethod
|
|
def categorical_bucket_parameters(definition):
|
|
# Categorical histograms default to 50 buckets to make working with them easier.
|
|
# Otherwise when adding labels later we run into problems with the pipeline not
|
|
# supporting bucket changes.
|
|
# This can be overridden using the n_values field.
|
|
n_values = max(
|
|
len(definition["labels"]),
|
|
definition.get("n_values", 0),
|
|
MIN_CATEGORICAL_BUCKET_COUNT,
|
|
)
|
|
return (1, n_values, n_values + 1)
|
|
|
|
@staticmethod
|
|
def exponential_bucket_parameters(definition):
|
|
return (definition.get("low", 1), definition["high"], definition["n_buckets"])
|
|
|
|
def set_nsITelemetry_kind(self):
|
|
# Pick a Telemetry implementation type.
|
|
types = {
|
|
"boolean": "BOOLEAN",
|
|
"flag": "FLAG",
|
|
"count": "COUNT",
|
|
"enumerated": "LINEAR",
|
|
"categorical": "CATEGORICAL",
|
|
"linear": "LINEAR",
|
|
"exponential": "EXPONENTIAL",
|
|
}
|
|
|
|
if self._kind not in types:
|
|
ParserError(
|
|
'Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)
|
|
).handle_later()
|
|
|
|
self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
|
|
|
|
def set_dataset(self, definition):
|
|
datasets = {
|
|
"opt-in": "DATASET_PRERELEASE_CHANNELS",
|
|
"opt-out": "DATASET_ALL_CHANNELS",
|
|
}
|
|
|
|
value = definition.get("releaseChannelCollection", "opt-in")
|
|
if value not in datasets:
|
|
ParserError(
|
|
"Unknown value for releaseChannelCollection"
|
|
' policy for histogram "%s".' % self._name
|
|
).handle_later()
|
|
|
|
self._dataset = "nsITelemetry::" + datasets[value]
|
|
|
|
|
|
# This hook function loads the histograms into an OrderedDict.
|
|
# It will raise a ParserError if duplicate keys are found.
|
|
def load_histograms_into_dict(ordered_pairs, strict_type_checks):
|
|
d = collections.OrderedDict()
|
|
for key, value in ordered_pairs:
|
|
if strict_type_checks and key in d:
|
|
ParserError(
|
|
"Found duplicate key in Histograms file: %s" % key
|
|
).handle_later()
|
|
d[key] = value
|
|
return d
|
|
|
|
|
|
# We support generating histograms from multiple different input files, not
|
|
# just Histograms.json. For each file's basename, we have a specific
|
|
# routine to parse that file, and return a dictionary mapping histogram
|
|
# names to histogram parameters.
|
|
def from_json(filename, strict_type_checks):
|
|
with open(filename, "r") as f:
|
|
try:
|
|
|
|
def hook(ps):
|
|
return load_histograms_into_dict(ps, strict_type_checks)
|
|
|
|
histograms = json.load(f, object_pairs_hook=hook)
|
|
except ValueError as e:
|
|
ParserError(
|
|
"error parsing histograms in %s: %s" % (filename, e)
|
|
).handle_now()
|
|
return histograms
|
|
|
|
|
|
def from_UseCounters_conf(filename, strict_type_checks):
|
|
return usecounters.generate_histograms(filename)
|
|
|
|
|
|
def from_UseCountersWorker_conf(filename, strict_type_checks):
|
|
return usecounters.generate_histograms(filename, True)
|
|
|
|
|
|
def from_nsDeprecatedOperationList(filename, strict_type_checks):
|
|
operation_regex = re.compile("^DEPRECATED_OPERATION\\(([^)]+)\\)")
|
|
histograms = collections.OrderedDict()
|
|
|
|
with open(filename, "r") as f:
|
|
for line in f:
|
|
match = operation_regex.search(line)
|
|
if not match:
|
|
continue
|
|
|
|
op = match.group(1)
|
|
|
|
def add_counter(context):
|
|
name = "USE_COUNTER2_DEPRECATED_%s_%s" % (op, context.upper())
|
|
histograms[name] = {
|
|
"expires_in_version": "never",
|
|
"kind": "boolean",
|
|
"description": "Whether a %s used %s" % (context, op),
|
|
}
|
|
|
|
add_counter("document")
|
|
add_counter("page")
|
|
|
|
return histograms
|
|
|
|
|
|
def to_camel_case(property_name):
|
|
return re.sub(
|
|
"(^|_|-)([a-z0-9])",
|
|
lambda m: m.group(2).upper(),
|
|
property_name.strip("_").strip("-"),
|
|
)
|
|
|
|
|
|
def add_css_property_counters(histograms, property_name):
|
|
def add_counter(context):
|
|
name = "USE_COUNTER2_CSS_PROPERTY_%s_%s" % (
|
|
to_camel_case(property_name),
|
|
context.upper(),
|
|
)
|
|
histograms[name] = {
|
|
"expires_in_version": "never",
|
|
"kind": "boolean",
|
|
"description": "Whether a %s used the CSS property %s"
|
|
% (context, property_name),
|
|
}
|
|
|
|
add_counter("document")
|
|
add_counter("page")
|
|
|
|
|
|
def from_ServoCSSPropList(filename, strict_type_checks):
|
|
histograms = collections.OrderedDict()
|
|
properties = runpy.run_path(filename)["data"]
|
|
for prop in properties:
|
|
add_css_property_counters(histograms, prop.name)
|
|
return histograms
|
|
|
|
|
|
def from_counted_unknown_properties(filename, strict_type_checks):
|
|
histograms = collections.OrderedDict()
|
|
properties = runpy.run_path(filename)["COUNTED_UNKNOWN_PROPERTIES"]
|
|
|
|
# NOTE(emilio): Unlike ServoCSSProperties, `prop` here is just the property
|
|
# name.
|
|
#
|
|
# We use the same naming as CSS properties so that we don't get
|
|
# discontinuity when we implement or prototype them.
|
|
for prop in properties:
|
|
add_css_property_counters(histograms, prop)
|
|
return histograms
|
|
|
|
|
|
# This is only used for probe-scraper.
|
|
def from_properties_db(filename, strict_type_checks):
|
|
histograms = collections.OrderedDict()
|
|
with open(filename, "r") as f:
|
|
in_css_properties = False
|
|
|
|
for line in f:
|
|
if not in_css_properties:
|
|
if line.startswith("exports.CSS_PROPERTIES = {"):
|
|
in_css_properties = True
|
|
continue
|
|
|
|
if line.startswith("};"):
|
|
break
|
|
|
|
if not line.startswith(' "'):
|
|
continue
|
|
|
|
name = line.split('"')[1]
|
|
add_css_property_counters(histograms, name)
|
|
return histograms
|
|
|
|
|
|
FILENAME_PARSERS = [
|
|
(lambda x: from_json if x.endswith(".json") else None),
|
|
(
|
|
lambda x: from_nsDeprecatedOperationList
|
|
if x == "nsDeprecatedOperationList.h"
|
|
else None
|
|
),
|
|
(lambda x: from_ServoCSSPropList if x == "ServoCSSPropList.py" else None),
|
|
(
|
|
lambda x: from_counted_unknown_properties
|
|
if x == "counted_unknown_properties.py"
|
|
else None
|
|
),
|
|
(lambda x: from_properties_db if x == "properties-db.js" else None),
|
|
]
|
|
|
|
# Similarly to the dance above with buildconfig, usecounters may not be
|
|
# available, so handle that gracefully.
|
|
try:
|
|
import usecounters
|
|
|
|
FILENAME_PARSERS.append(
|
|
lambda x: from_UseCounters_conf if x == "UseCounters.conf" else None
|
|
)
|
|
FILENAME_PARSERS.append(
|
|
lambda x: from_UseCountersWorker_conf if x == "UseCountersWorker.conf" else None
|
|
)
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
def from_files(filenames, strict_type_checks=True):
|
|
"""Return an iterator that provides a sequence of Histograms for
|
|
the histograms defined in filenames.
|
|
"""
|
|
if strict_type_checks:
|
|
load_allowlist()
|
|
|
|
all_histograms = OrderedDict()
|
|
for filename in filenames:
|
|
parser = None
|
|
for checkFn in FILENAME_PARSERS:
|
|
parser = checkFn(os.path.basename(filename))
|
|
if parser is not None:
|
|
break
|
|
|
|
if parser is None:
|
|
ParserError("Don't know how to parse %s." % filename).handle_now()
|
|
|
|
histograms = parser(filename, strict_type_checks)
|
|
|
|
# OrderedDicts are important, because then the iteration order over
|
|
# the parsed histograms is stable, which makes the insertion into
|
|
# all_histograms stable, which makes ordering in generated files
|
|
# stable, which makes builds more deterministic.
|
|
if not isinstance(histograms, OrderedDict):
|
|
ParserError("Histogram parser did not provide an OrderedDict.").handle_now()
|
|
|
|
for (name, definition) in histograms.items():
|
|
if name in all_histograms:
|
|
ParserError('Duplicate histogram name "%s".' % name).handle_later()
|
|
all_histograms[name] = definition
|
|
|
|
def check_continuity(iterable, filter_function, name):
|
|
indices = list(filter(filter_function, enumerate(iter(iterable.keys()))))
|
|
if indices:
|
|
lower_bound = indices[0][0]
|
|
upper_bound = indices[-1][0]
|
|
n_counters = upper_bound - lower_bound + 1
|
|
if n_counters != len(indices):
|
|
ParserError(
|
|
"Histograms %s must be defined in a contiguous block." % name
|
|
).handle_later()
|
|
|
|
# We require that all USE_COUNTER2_*_WORKER histograms be defined in a contiguous
|
|
# block.
|
|
check_continuity(
|
|
all_histograms,
|
|
lambda x: x[1].startswith("USE_COUNTER2_") and x[1].endswith("_WORKER"),
|
|
"use counter worker",
|
|
)
|
|
# And all other USE_COUNTER2_* histograms be defined in a contiguous
|
|
# block.
|
|
check_continuity(
|
|
all_histograms,
|
|
lambda x: x[1].startswith("USE_COUNTER2_") and not x[1].endswith("_WORKER"),
|
|
"use counter",
|
|
)
|
|
|
|
# Check that histograms that were removed from Histograms.json etc.
|
|
# are also removed from the allowlists.
|
|
if allowlists is not None:
|
|
all_allowlist_entries = itertools.chain.from_iterable(iter(allowlists.values()))
|
|
orphaned = set(all_allowlist_entries) - set(all_histograms.keys())
|
|
if len(orphaned) > 0:
|
|
msg = (
|
|
"The following entries are orphaned and should be removed from "
|
|
"histogram-allowlists.json:\n%s"
|
|
)
|
|
ParserError(msg % (", ".join(sorted(orphaned)))).handle_later()
|
|
|
|
for (name, definition) in all_histograms.items():
|
|
yield Histogram(name, definition, strict_type_checks=strict_type_checks)
|