Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124010
D6358.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D6358.diff
View Options
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -14,6 +14,8 @@
from .interface import ProvenanceInterface
from .model import OriginEntry, RevisionEntry
+ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
+
class CSVOriginIterator:
"""Iterator over origin visit statuses typically present in the given CSV
@@ -43,10 +45,7 @@
return (OriginEntry(url, snapshot) for url, snapshot in self.statuses)
-@statsd.timed(
- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
- tags={"method": "main"},
-)
+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "main"})
def origin_add(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
@@ -61,10 +60,7 @@
provenance.flush()
-@statsd.timed(
- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
- tags={"method": "process_revision"},
-)
+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"})
def origin_add_revision(
provenance: ProvenanceInterface,
origin: OriginEntry,
@@ -94,10 +90,7 @@
stack.append(parent)
-@statsd.timed(
- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
- tags={"method": "check_preferred_origin"},
-)
+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "check_preferred_origin"})
def check_preferred_origin(
provenance: ProvenanceInterface,
origin: OriginEntry,
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -12,6 +12,8 @@
from swh.model.model import Sha1Git
from swh.storage import get_storage
+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_direct_duration_seconds"
+
class ArchivePostgreSQL:
def __init__(self, conn: psycopg2.extensions.connection) -> None:
@@ -25,10 +27,7 @@
yield from entries
@lru_cache(maxsize=100000)
- @statsd.timed(
- metric="swh_provenance_archive_direct_accesstime_seconds",
- tags={"method": "directory_ls"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]:
# TODO: add file size filtering
with self.conn.cursor() as cursor:
@@ -72,8 +71,7 @@
]
@statsd.timed(
- metric="swh_provenance_archive_direct_accesstime_seconds",
- tags={"method": "revision_get_parents"},
+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
)
def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
with self.conn.cursor() as cursor:
@@ -89,10 +87,7 @@
# There should be at most one row anyway
yield from (row[0] for row in cursor)
- @statsd.timed(
- metric="swh_provenance_archive_direct_accesstime_seconds",
- tags={"method": "snapshot_get_heads"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
with self.conn.cursor() as cursor:
cursor.execute(
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -15,6 +15,8 @@
from .interface import ProvenanceInterface
from .model import DirectoryEntry, RevisionEntry
+REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
+
class CSVRevisionIterator:
"""Iterator over revisions typically present in the given CSV file.
@@ -49,10 +51,7 @@
yield RevisionEntry(id, date=date, root=root)
-@statsd.timed(
- metric="swh_provenance_revision_content_layer_accesstime_seconds",
- tags={"method": "main"},
-)
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"})
def revision_add(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
@@ -88,10 +87,7 @@
provenance.flush()
-@statsd.timed(
- metric="swh_provenance_revision_content_layer_accesstime_seconds",
- tags={"method": "process_content"},
-)
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"})
def revision_process_content(
archive: ArchiveInterface,
provenance: ProvenanceInterface,
@@ -157,10 +153,7 @@
stack.append(child)
-@statsd.timed(
- metric="swh_provenance_revision_content_layer_accesstime_seconds",
- tags={"method": "flatten_directory"},
-)
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"})
def flatten_directory(
archive: ArchiveInterface,
provenance: ProvenanceInterface,
@@ -191,12 +184,12 @@
assert node.maxdate is not None # for mypy
assert revision.date is not None # idem
if trackall:
- # The only real condition for a directory to be a frontier is that its
- # content is already known and its maxdate is less (or equal) than
- # current revision's date. Checking mindepth is meant to skip root
- # directories (or any arbitrary depth) to improve the result. The
- # option lower tries to maximize the reusage rate of previously defined
- # frontiers by keeping them low in the directory tree.
+ # The only real condition for a directory to be a frontier is that its content
+ # is already known and its maxdate is less (or equal) than current revision's
+ # date. Checking mindepth is meant to skip root directories (or any arbitrary
+ # depth) to improve the result. The option lower tries to maximize the reuse
+ # rate of previously defined frontiers by keeping them low in the directory
+ # tree.
return (
node.known
and node.maxdate <= revision.date # all content is earlier than revision
@@ -207,7 +200,7 @@
else:
# If we are only tracking first occurrences, we want to ensure that all first
# occurrences end up in the content_early_in_rev relation. Thus, we force for
- # every blob outside a frontier to have an extrictly earlier date.
+ # every blob outside a frontier to have an strictly earlier date.
return (
node.maxdate < revision.date # all content is earlier than revision
and node.depth >= mindepth # deeper than the min allowed depth
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -10,15 +10,14 @@
from swh.model.model import ObjectType, Sha1Git, TargetType
from swh.storage.interface import StorageInterface
+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_api_duration_seconds"
+
class ArchiveStorage:
def __init__(self, storage: StorageInterface) -> None:
self.storage = storage
- @statsd.timed(
- metric="swh_provenance_archive_api_accesstime_seconds",
- tags={"method": "directory_ls"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
def directory_ls(self, id: Sha1Git) -> Iterable[Dict[str, Any]]:
# TODO: add file size filtering
for entry in self.storage.directory_ls(id):
@@ -29,18 +28,14 @@
}
@statsd.timed(
- metric="swh_provenance_archive_api_accesstime_seconds",
- tags={"method": "revision_get_parents"},
+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
)
def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
rev = self.storage.revision_get([id])[0]
if rev is not None:
yield from rev.parents
- @statsd.timed(
- metric="swh_provenance_archive_api_accesstime_seconds",
- tags={"method": "snapshot_get_heads"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
from swh.core.utils import grouper
from swh.storage.algos.snapshot import snapshot_get_all_branches
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 9:12 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225284
Attached To
D6358: Make old StatsD metrics style compliant with the rest of the module
Event Timeline
Log In to Comment