diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -467,17 +467,30 @@ ), "lister_instance_name is None, but lister_name is not" metadata = [] - for cls in get_fetchers_for_lister(self.lister_name): + + fetcher_classes = get_fetchers_for_lister(self.lister_name) + + self.statsd_average("metadata_fetchers", len(fetcher_classes)) + + for cls in fetcher_classes: metadata_fetcher = cls( origin=self.origin, lister_name=self.lister_name, lister_instance_name=self.lister_instance_name, credentials=self.metadata_fetcher_credentials, ) - with self.statsd_timed("fetch_one_metadata"): + with self.statsd_timed( + "fetch_one_metadata", tags={"fetcher": cls.FETCHER_NAME} + ): metadata.extend(metadata_fetcher.get_origin_metadata()) if self.parent_origins is None: self.parent_origins = metadata_fetcher.get_parent_origins() + self.statsd_average( + "metadata_parent_origins", + len(self.parent_origins), + tags={"fetcher": cls.FETCHER_NAME}, + ) + self.statsd_average("metadata_objects", len(metadata)) return metadata @@ -496,6 +509,17 @@ tags={"visit_type": self.visit_type, "operation": name, **tags}, ) + def statsd_average(self, name, value, tags={}): + statsd.increment( + f"{STATSD_PREFIX}_{name}_sum", + value, + tags={"visit_type": self.visit_type, **tags}, + ) + statsd.increment( + f"{STATSD_PREFIX}_{name}_count", + tags={"visit_type": self.visit_type, **tags}, + ) + class DVCSLoader(BaseLoader): """This base class is a pattern for dvcs loaders (e.g. git, mercurial). diff --git a/swh/loader/core/metadata_fetchers.py b/swh/loader/core/metadata_fetchers.py --- a/swh/loader/core/metadata_fetchers.py +++ b/swh/loader/core/metadata_fetchers.py @@ -20,6 +20,7 @@ to loaders, via setuptools entrypoints.""" SUPPORTED_LISTERS: Set[str] + FETCHER_NAME: str def __init__( self, diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -97,7 +97,8 @@ class DummyMetadataFetcher: - SUPPORTED_LISTERS = {"fake-lister"} + SUPPORTED_LISTERS = {"fake-forge"} + FETCHER_NAME = "fake-forge" def __init__(self, origin, credentials, lister_name, lister_instance_name): pass @@ -110,7 +111,8 @@ class DummyMetadataFetcherWithFork: - SUPPORTED_LISTERS = {"fake-lister"} + SUPPORTED_LISTERS = {"fake-forge"} + FETCHER_NAME = "fake-forge" def __init__(self, origin, credentials, lister_name, lister_instance_name): pass @@ -146,12 +148,14 @@ def test_base_loader_with_known_lister_name(swh_storage, mocker): fetcher_cls = MagicMock(wraps=DummyMetadataFetcher) fetcher_cls.SUPPORTED_LISTERS = DummyMetadataFetcher.SUPPORTED_LISTERS + fetcher_cls.FETCHER_NAME = "fake-forge" mocker.patch( "swh.loader.core.metadata_fetchers._fetchers", return_value=[fetcher_cls] ) + statsd_report = mocker.patch("swh.core.statsd.statsd._report") loader = DummyBaseLoader( - swh_storage, lister_name="fake-lister", lister_instance_name="" + swh_storage, lister_name="fake-forge", lister_instance_name="" ) result = loader.load() assert result == {"status": "eventful"} @@ -160,7 +164,7 @@ fetcher_cls.assert_called_once_with( origin=ORIGIN, credentials={}, - lister_name="fake-lister", + lister_name="fake-forge", lister_instance_name="", ) assert swh_storage.raw_extrinsic_metadata_get( @@ -168,6 +172,27 @@ ).results == [REMD] assert loader.parent_origins == [] + assert [ + call("swh_loader_metadata_fetchers_sum", "c", 1, {"visit_type": "git"}, 1), + call("swh_loader_metadata_fetchers_count", "c", 1, {"visit_type": "git"}, 1), + call( + "swh_loader_metadata_parent_origins_sum", + "c", + 0, + {"fetcher": "fake-forge", "visit_type": "git"}, + 1, + ), + call( + "swh_loader_metadata_parent_origins_count", + "c", + 1, + {"fetcher": "fake-forge", "visit_type": "git"}, + 1, + ), + call("swh_loader_metadata_objects_sum", "c", 1, {"visit_type": "git"}, 1), + call("swh_loader_metadata_objects_count", "c", 1, {"visit_type": "git"}, 1), + ] == [c for c in statsd_report.mock_calls if "_metadata_" in c[1][0]] + def test_base_loader_with_unknown_lister_name(swh_storage, mocker): fetcher_cls = MagicMock(wraps=DummyMetadataFetcher) @@ -190,12 +215,14 @@ def test_base_loader_forked_origin(swh_storage, mocker): fetcher_cls = MagicMock(wraps=DummyMetadataFetcherWithFork) fetcher_cls.SUPPORTED_LISTERS = DummyMetadataFetcherWithFork.SUPPORTED_LISTERS + fetcher_cls.FETCHER_NAME = "fake-forge" mocker.patch( "swh.loader.core.metadata_fetchers._fetchers", return_value=[fetcher_cls] ) + statsd_report = mocker.patch("swh.core.statsd.statsd._report") loader = DummyBaseLoader( - swh_storage, lister_name="fake-lister", lister_instance_name="" + swh_storage, lister_name="fake-forge", lister_instance_name="" ) result = loader.load() assert result == {"status": "eventful"} @@ -204,7 +231,7 @@ fetcher_cls.assert_called_once_with( origin=ORIGIN, credentials={}, - lister_name="fake-lister", + lister_name="fake-forge", lister_instance_name="", ) assert swh_storage.raw_extrinsic_metadata_get( @@ -212,6 +239,27 @@ ).results == [REMD] assert loader.parent_origins == [PARENT_ORIGIN] + assert [ + call("swh_loader_metadata_fetchers_sum", "c", 1, {"visit_type": "git"}, 1), + call("swh_loader_metadata_fetchers_count", "c", 1, {"visit_type": "git"}, 1), + call( + "swh_loader_metadata_parent_origins_sum", + "c", + 1, + {"fetcher": "fake-forge", "visit_type": "git"}, + 1, + ), + call( + "swh_loader_metadata_parent_origins_count", + "c", + 1, + {"fetcher": "fake-forge", "visit_type": "git"}, + 1, + ), + call("swh_loader_metadata_objects_sum", "c", 1, {"visit_type": "git"}, 1), + call("swh_loader_metadata_objects_count", "c", 1, {"visit_type": "git"}, 1), + ] == [c for c in statsd_report.mock_calls if "_metadata_" in c[1][0]] + def test_dvcs_loader(swh_storage): loader = DummyDVCSLoader(swh_storage)