diff --git a/swh/loader/git/base.py b/swh/loader/git/base.py --- a/swh/loader/git/base.py +++ b/swh/loader/git/base.py @@ -119,5 +119,10 @@ # cannot use self.statsd_average, because this is a weighted average tags = {"object_type": object_type} - self.statsd.increment("filtered_objects_percent_sum", filtered, tags=tags) - self.statsd.increment("filtered_objects_percent_count", total, tags=tags) + + # unweighted average + self.statsd_average("filtered_objects_percent", filtered / total, tags=tags) + + # average weighted by total + self.statsd.increment("filtered_objects_total_sum", filtered, tags=tags) + self.statsd.increment("filtered_objects_total_count", total, tags=tags) diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py --- a/swh/loader/git/tests/test_loader.py +++ b/swh/loader/git/tests/test_loader.py @@ -126,17 +126,27 @@ call("git_ignored_refs_percent", "h", 0.0, {}, 1), call("git_known_refs_percent", "h", 0.0, {}, 1), ] - sum_name = "filtered_objects_percent_sum" - count_name = "filtered_objects_percent_count" + total_sum_name = "filtered_objects_total_sum" + total_count_name = "filtered_objects_total_count" + percent_sum_name = "filtered_objects_percent_sum" + percent_count_name = "filtered_objects_percent_count" assert [c for c in statsd_calls if c[1][0].startswith("filtered_")] == [ - call(sum_name, "c", 0, {"object_type": "content"}, 1), - call(count_name, "c", 4, {"object_type": "content"}, 1), - call(sum_name, "c", 0, {"object_type": "directory"}, 1), - call(count_name, "c", 7, {"object_type": "directory"}, 1), - call(sum_name, "c", 0, {"object_type": "revision"}, 1), - call(count_name, "c", 7, {"object_type": "revision"}, 1), - call(sum_name, "c", 0, {"object_type": "snapshot"}, 1), - call(count_name, "c", 1, {"object_type": "snapshot"}, 1), + call(percent_sum_name, "c", 0.0, {"object_type": "content"}, 1), + call(percent_count_name, "c", 1, {"object_type": "content"}, 1), + call(total_sum_name, "c", 0, {"object_type": "content"}, 1), + call(total_count_name, "c", 4, {"object_type": "content"}, 1), + call(percent_sum_name, "c", 0.0, {"object_type": "directory"}, 1), + call(percent_count_name, "c", 1, {"object_type": "directory"}, 1), + call(total_sum_name, "c", 0, {"object_type": "directory"}, 1), + call(total_count_name, "c", 7, {"object_type": "directory"}, 1), + call(percent_sum_name, "c", 0.0, {"object_type": "revision"}, 1), + call(percent_count_name, "c", 1, {"object_type": "revision"}, 1), + call(total_sum_name, "c", 0, {"object_type": "revision"}, 1), + call(total_count_name, "c", 7, {"object_type": "revision"}, 1), + call(percent_sum_name, "c", 0.0, {"object_type": "snapshot"}, 1), + call(percent_count_name, "c", 1, {"object_type": "snapshot"}, 1), + call(total_sum_name, "c", 0, {"object_type": "snapshot"}, 1), + call(total_count_name, "c", 1, {"object_type": "snapshot"}, 1), ] assert self.loader.statsd.constant_tags == { "visit_type": "git", @@ -188,17 +198,27 @@ call("git_ignored_refs_percent", "h", 0.0, {}, 1), call("git_known_refs_percent", "h", 0.0, {}, 1), ] - sum_name = "filtered_objects_percent_sum" - count_name = "filtered_objects_percent_count" + total_sum_name = "filtered_objects_total_sum" + total_count_name = "filtered_objects_total_count" + percent_sum_name = "filtered_objects_percent_sum" + percent_count_name = "filtered_objects_percent_count" assert [c for c in statsd_calls if c[1][0].startswith("filtered_")] == [ - call(sum_name, "c", len(known_cnts), {"object_type": "content"}, 1), - call(count_name, "c", 4, {"object_type": "content"}, 1), - call(sum_name, "c", len(known_dirs), {"object_type": "directory"}, 1), - call(count_name, "c", 7, {"object_type": "directory"}, 1), - call(sum_name, "c", len(known_revs), {"object_type": "revision"}, 1), - call(count_name, "c", 7, {"object_type": "revision"}, 1), - call(sum_name, "c", 0, {"object_type": "snapshot"}, 1), - call(count_name, "c", 1, {"object_type": "snapshot"}, 1), + call(percent_sum_name, "c", 1 / 4, {"object_type": "content"}, 1), + call(percent_count_name, "c", 1, {"object_type": "content"}, 1), + call(total_sum_name, "c", 1, {"object_type": "content"}, 1), + call(total_count_name, "c", 4, {"object_type": "content"}, 1), + call(percent_sum_name, "c", 3 / 7, {"object_type": "directory"}, 1), + call(percent_count_name, "c", 1, {"object_type": "directory"}, 1), + call(total_sum_name, "c", 3, {"object_type": "directory"}, 1), + call(total_count_name, "c", 7, {"object_type": "directory"}, 1), + call(percent_sum_name, "c", 2 / 7, {"object_type": "revision"}, 1), + call(percent_count_name, "c", 1, {"object_type": "revision"}, 1), + call(total_sum_name, "c", 2, {"object_type": "revision"}, 1), + call(total_count_name, "c", 7, {"object_type": "revision"}, 1), + call(percent_sum_name, "c", 0.0, {"object_type": "snapshot"}, 1), + call(percent_count_name, "c", 1, {"object_type": "snapshot"}, 1), + call(total_sum_name, "c", 0, {"object_type": "snapshot"}, 1), + call(total_count_name, "c", 1, {"object_type": "snapshot"}, 1), ] assert self.loader.statsd.constant_tags == { "visit_type": "git",