Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123106
D7726.id27949.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D7726.id27949.diff
View Options
diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -3,15 +3,18 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import contextlib
import datetime
import hashlib
import logging
import os
+import time
from typing import Any, Dict, Iterable, List, Optional
import sentry_sdk
from swh.core.config import load_from_envvar
+from swh.core.statsd import statsd
from swh.loader.core.metadata_fetchers import CredentialsType, get_fetchers_for_lister
from swh.loader.exception import NotFound
from swh.model.model import (
@@ -36,6 +39,8 @@
"max_content_size": 100 * 1024 * 1024,
}
+STATSD_PREFIX = "swh.loader.core"
+
class BaseLoader:
"""Base class for (D)VCS loaders (e.g Svn, Git, Mercurial, ...) or PackageLoader (e.g
@@ -317,7 +322,8 @@
"""
try:
- self.pre_cleanup()
+ with self.statsd_timed("pre_cleanup"):
+ self.pre_cleanup()
except Exception:
msg = "Cleaning up dangling data failed! Continue loading."
self.log.warning(msg)
@@ -333,7 +339,8 @@
)
try:
- metadata = self.build_extrinsic_origin_metadata()
+ with self.statsd_timed("build_extrinsic_origin_metadata"):
+ metadata = self.build_extrinsic_origin_metadata()
self.load_metadata_objects(metadata)
except Exception as e:
sentry_sdk.capture_exception(e)
@@ -350,26 +357,44 @@
},
)
+ total_time_fetch_data = 0.0
+ total_time_store_data = 0.0
+
try:
- self.prepare()
+ with self.statsd_timed("prepare"):
+ self.prepare()
while True:
+ t1 = time.monotonic()
more_data_to_fetch = self.fetch_data()
+ t2 = time.monotonic()
+ total_time_fetch_data += t2 - t1
self.store_data()
+ t3 = time.monotonic()
+ total_time_store_data += t3 - t2
if not more_data_to_fetch:
break
+ self.statsd_timing("fetch_data", total_time_fetch_data * 1000.0)
+ self.statsd_timing("store_data", total_time_store_data * 1000.0)
+
+ status = self.visit_status()
visit_status = OriginVisitStatus(
origin=self.origin.url,
visit=self.visit.visit,
type=self.visit_type,
date=now(),
- status=self.visit_status(),
+ status=status,
snapshot=self.loaded_snapshot_id,
)
self.storage.origin_visit_status_add([visit_status])
- self.post_load()
+ success = True
+ with self.statsd_timed(
+ "post_load", tags={"success": success, "status": status}
+ ):
+ self.post_load()
except Exception as e:
+ success = False
if isinstance(e, NotFound):
status = "not_found"
task_status = "uneventful"
@@ -399,11 +424,20 @@
snapshot=self.loaded_snapshot_id,
)
self.storage.origin_visit_status_add([visit_status])
- self.post_load(success=False)
+ with self.statsd_timed(
+ "post_load", tags={"success": success, "status": status}
+ ):
+ self.post_load(success=success)
return {"status": task_status}
finally:
- self.flush()
- self.cleanup()
+ with self.statsd_timed(
+ "flush", tags={"success": success, "status": status}
+ ):
+ self.flush()
+ with self.statsd_timed(
+ "cleanup", tags={"success": success, "status": status}
+ ):
+ self.cleanup()
return self.load_status()
@@ -440,12 +474,28 @@
lister_instance_name=self.lister_instance_name,
credentials=self.metadata_fetcher_credentials,
)
- metadata.extend(metadata_fetcher.get_origin_metadata())
+ with self.statsd_timed("fetch_one_metadata"):
+ metadata.extend(metadata_fetcher.get_origin_metadata())
if self.parent_origins is None:
self.parent_origins = metadata_fetcher.get_parent_origins()
return metadata
+ @contextlib.contextmanager
+ def statsd_timed(self, name, tags={}):
+ with statsd.timed(
+ f"{STATSD_PREFIX}.duration_seconds.{name}",
+ tags={"visit_type": self.visit_type, **tags},
+ ):
+ yield
+
+ def statsd_timing(self, name, value, tags={}):
+ statsd.timing(
+ f"{STATSD_PREFIX}.duration_seconds.{name}",
+ value,
+ tags={"visit_type": self.visit_type, **tags},
+ )
+
class DVCSLoader(BaseLoader):
"""This base class is a pattern for dvcs loaders (e.g. git, mercurial).
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -6,7 +6,8 @@
import datetime
import hashlib
import logging
-from unittest.mock import MagicMock
+import time
+from unittest.mock import MagicMock, call
import pytest
@@ -277,6 +278,76 @@
assert loader.loaded_snapshot_id is None
+@pytest.mark.parametrize("success", [True, False])
+def test_loader_timings(swh_storage, mocker, success):
+ current_time = time.time()
+ mocker.patch("time.monotonic", side_effect=lambda: current_time)
+ mocker.patch("swh.core.statsd.monotonic", side_effect=lambda: current_time)
+
+ runtimes = {
+ "pre_cleanup": 2.0,
+ "build_extrinsic_origin_metadata": 3.0,
+ "prepare": 5.0,
+ "fetch_data": 7.0,
+ "store_data": 11.0,
+ "post_load": 13.0,
+ "flush": 17.0,
+ "cleanup": 23.0,
+ }
+
+ class TimedLoader(BaseLoader):
+ visit_type = "my-visit-type"
+
+ def __getattribute__(self, method_name):
+ if method_name == "visit_status" and not success:
+
+ def crashy():
+ raise Exception("oh no")
+
+ return crashy
+
+ if method_name not in runtimes:
+ return super().__getattribute__(method_name)
+
+ def meth(*args, **kwargs):
+ nonlocal current_time
+ current_time += runtimes[method_name]
+
+ return meth
+
+ statsd_report = mocker.patch("swh.core.statsd.statsd._report")
+
+ loader = TimedLoader(swh_storage, origin_url="http://example.org/hello.git")
+ loader.load()
+
+ if success:
+ expected_tags = {
+ "post_load": {"success": True, "status": "full"},
+ "flush": {"success": True, "status": "full"},
+ "cleanup": {"success": True, "status": "full"},
+ }
+ else:
+ expected_tags = {
+ "post_load": {"success": False, "status": "failed"},
+ "flush": {"success": False, "status": "failed"},
+ "cleanup": {"success": False, "status": "failed"},
+ }
+
+ # note that this is a list equality, so order of entries in 'runtimes' matters.
+ # This is not perfect, but call() objects are not hashable so it's simpler this way,
+ # even if not perfect.
+ assert statsd_report.mock_calls == [
+ call(
+ f"swh.loader.core.duration_seconds.{key}",
+ "ms",
+ value * 1000,
+ {"visit_type": "my-visit-type", **expected_tags.get(key, {})},
+ 1,
+ )
+ for (key, value) in runtimes.items()
+ ]
+
+
class DummyDVCSLoaderExc(DummyDVCSLoader):
"""A loader which raises an exception when loading some contents"""
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 9:42 PM (2 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229201
Attached To
D7726: loader.core: Add statsd timing metrics
Event Timeline
Log In to Comment