Changeset View
Standalone View
swh/loader/core/loader.py
Show First 20 Lines • Show All 461 Lines • ▼ Show 20 Lines | def build_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadata]: | ||||
self.log.debug("lister_not provided, skipping extrinsic origin metadata") | self.log.debug("lister_not provided, skipping extrinsic origin metadata") | ||||
return [] | return [] | ||||
assert ( | assert ( | ||||
self.lister_instance_name is not None | self.lister_instance_name is not None | ||||
), "lister_instance_name is None, but lister_name is not" | ), "lister_instance_name is None, but lister_name is not" | ||||
metadata = [] | metadata = [] | ||||
for cls in get_fetchers_for_lister(self.lister_name): | |||||
fetcher_classes = get_fetchers_for_lister(self.lister_name) | |||||
statsd.histogram( | |||||
f"{STATSD_PREFIX}.metadata.fetchers", | |||||
len(fetcher_classes), | |||||
tags={"visit_type": self.visit_type}, | |||||
olasd: For all these histograms, we will want to change the configuration of the statsd exporter for… | |||||
) | |||||
for cls in fetcher_classes: | |||||
metadata_fetcher = cls( | metadata_fetcher = cls( | ||||
origin=self.origin, | origin=self.origin, | ||||
lister_name=self.lister_name, | lister_name=self.lister_name, | ||||
lister_instance_name=self.lister_instance_name, | lister_instance_name=self.lister_instance_name, | ||||
credentials=self.metadata_fetcher_credentials, | credentials=self.metadata_fetcher_credentials, | ||||
) | ) | ||||
with self.statsd_timed("fetch_one_metadata"): | with self.statsd_timed( | ||||
"fetch_one_metadata", tags={"fetcher": cls.FETCHER_NAME} | |||||
): | |||||
metadata.extend(metadata_fetcher.get_origin_metadata()) | metadata.extend(metadata_fetcher.get_origin_metadata()) | ||||
if self.parent_origins is None: | if self.parent_origins is None: | ||||
self.parent_origins = metadata_fetcher.get_parent_origins() | self.parent_origins = metadata_fetcher.get_parent_origins() | ||||
statsd.histogram( | |||||
f"{STATSD_PREFIX}.metadata.parent_origins", | |||||
len(self.parent_origins), | |||||
tags={"fetcher": cls.FETCHER_NAME, "visit_type": self.visit_type}, | |||||
) | |||||
statsd.histogram( | |||||
f"{STATSD_PREFIX}.metadata.objects", | |||||
len(metadata), | |||||
tags={"visit_type": self.visit_type}, | |||||
Done Inline ActionsDo we really want a histogram, or just a running total? (note that the histogram will also generate a running total) olasd: Do we really want a histogram, or just a running total? (note that the histogram will also… | |||||
Done Inline ActionsI don't know; I'm only interested in the average, and an histogram seemed like the only way to do that (ditto above) vlorentz: I don't know; I'm only interested in the average, and an histogram seemed like the only way to… | |||||
Done Inline ActionsIf you just want an average, then you just need to have a pair of metrics: one that is incremented for each operation (and will be shared across all metrics), and one that is incremented by the number of objects you want to count the average of. You can then divide these two to get a running average. The prometheus statsd exporter just generates these series automatically for histograms, so that they can be used with the prometheus histogram functions: there's a <foo>_count series for the number of times a value was measured, and a <foo>_sum series for the sum of all <foo> values mesured, in addition to all the "bucket counts" series (which generate one series for the number of counts lower than a given bucket limit). Either way, in terms of storage, as long as we pinpoint a small set of relevant buckets, it shouldn't matter too much. If you think that we're never going to need the fine-grained metrics (I probably agree), please use counters directly. olasd: If you just want an average, then you just need to have a pair of metrics: one that is… | |||||
Done Inline ActionsDone. I kept {name} inside the metric instead of tags; because it doesn't make sense to aggregate them (like in the other diff) IMO vlorentz: Done. I kept `{name}` inside the metric instead of tags; because it doesn't make sense to… | |||||
Not Done Inline ActionsYeah, that's perfect! olasd: Yeah, that's perfect! | |||||
) | |||||
return metadata | return metadata | ||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def statsd_timed(self, name, tags={}): | def statsd_timed(self, name, tags={}): | ||||
with statsd.timed( | with statsd.timed( | ||||
f"{STATSD_PREFIX}.operation_duration_seconds", | f"{STATSD_PREFIX}.operation_duration_seconds", | ||||
tags={"visit_type": self.visit_type, "operation": name, **tags}, | tags={"visit_type": self.visit_type, "operation": name, **tags}, | ||||
▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines |
For all these histograms, we will want to change the configuration of the statsd exporter for this metric to generate a histogram with relevant buckets. Our default buckets are only relevant for timings and will be really wasteful for this (which will pretty much always return 0 or 1)
https://forge.softwareheritage.org/source/puppet-swh-site/browse/production/data/common/common.yaml$3124-3151
https://github.com/prometheus/statsd_exporter#statsd-timers-and-distributions
(something like:
(you will want to test that in docker as well)