Page MenuHomeSoftware Heritage

D6358.diff
No OneTemporary

D6358.diff

diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -14,6 +14,8 @@
from .interface import ProvenanceInterface
from .model import OriginEntry, RevisionEntry
+ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
+
class CSVOriginIterator:
"""Iterator over origin visit statuses typically present in the given CSV
@@ -43,10 +45,7 @@
return (OriginEntry(url, snapshot) for url, snapshot in self.statuses)
-@statsd.timed(
- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
- tags={"method": "main"},
-)
+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "main"})
def origin_add(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
@@ -61,10 +60,7 @@
provenance.flush()
-@statsd.timed(
- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
- tags={"method": "process_revision"},
-)
+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"})
def origin_add_revision(
provenance: ProvenanceInterface,
origin: OriginEntry,
@@ -94,10 +90,7 @@
stack.append(parent)
-@statsd.timed(
- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
- tags={"method": "check_preferred_origin"},
-)
+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "check_preferred_origin"})
def check_preferred_origin(
provenance: ProvenanceInterface,
origin: OriginEntry,
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -12,6 +12,8 @@
from swh.model.model import Sha1Git
from swh.storage import get_storage
+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_direct_duration_seconds"
+
class ArchivePostgreSQL:
def __init__(self, conn: psycopg2.extensions.connection) -> None:
@@ -25,10 +27,7 @@
yield from entries
@lru_cache(maxsize=100000)
- @statsd.timed(
- metric="swh_provenance_archive_direct_accesstime_seconds",
- tags={"method": "directory_ls"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]:
# TODO: add file size filtering
with self.conn.cursor() as cursor:
@@ -72,8 +71,7 @@
]
@statsd.timed(
- metric="swh_provenance_archive_direct_accesstime_seconds",
- tags={"method": "revision_get_parents"},
+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
)
def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
with self.conn.cursor() as cursor:
@@ -89,10 +87,7 @@
# There should be at most one row anyway
yield from (row[0] for row in cursor)
- @statsd.timed(
- metric="swh_provenance_archive_direct_accesstime_seconds",
- tags={"method": "snapshot_get_heads"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
with self.conn.cursor() as cursor:
cursor.execute(
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -15,6 +15,8 @@
from .interface import ProvenanceInterface
from .model import DirectoryEntry, RevisionEntry
+REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
+
class CSVRevisionIterator:
"""Iterator over revisions typically present in the given CSV file.
@@ -49,10 +51,7 @@
yield RevisionEntry(id, date=date, root=root)
-@statsd.timed(
- metric="swh_provenance_revision_content_layer_accesstime_seconds",
- tags={"method": "main"},
-)
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"})
def revision_add(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
@@ -88,10 +87,7 @@
provenance.flush()
-@statsd.timed(
- metric="swh_provenance_revision_content_layer_accesstime_seconds",
- tags={"method": "process_content"},
-)
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"})
def revision_process_content(
archive: ArchiveInterface,
provenance: ProvenanceInterface,
@@ -157,10 +153,7 @@
stack.append(child)
-@statsd.timed(
- metric="swh_provenance_revision_content_layer_accesstime_seconds",
- tags={"method": "flatten_directory"},
-)
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"})
def flatten_directory(
archive: ArchiveInterface,
provenance: ProvenanceInterface,
@@ -191,12 +184,12 @@
assert node.maxdate is not None # for mypy
assert revision.date is not None # idem
if trackall:
- # The only real condition for a directory to be a frontier is that its
- # content is already known and its maxdate is less (or equal) than
- # current revision's date. Checking mindepth is meant to skip root
- # directories (or any arbitrary depth) to improve the result. The
- # option lower tries to maximize the reusage rate of previously defined
- # frontiers by keeping them low in the directory tree.
+ # The only real condition for a directory to be a frontier is that its content
+ # is already known and its maxdate is less (or equal) than current revision's
+ # date. Checking mindepth is meant to skip root directories (or any arbitrary
+ # depth) to improve the result. The option lower tries to maximize the reuse
+ # rate of previously defined frontiers by keeping them low in the directory
+ # tree.
return (
node.known
and node.maxdate <= revision.date # all content is earlier than revision
@@ -207,7 +200,7 @@
else:
# If we are only tracking first occurrences, we want to ensure that all first
# occurrences end up in the content_early_in_rev relation. Thus, we force for
- # every blob outside a frontier to have an extrictly earlier date.
+ # every blob outside a frontier to have an strictly earlier date.
return (
node.maxdate < revision.date # all content is earlier than revision
and node.depth >= mindepth # deeper than the min allowed depth
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -10,15 +10,14 @@
from swh.model.model import ObjectType, Sha1Git, TargetType
from swh.storage.interface import StorageInterface
+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_api_duration_seconds"
+
class ArchiveStorage:
def __init__(self, storage: StorageInterface) -> None:
self.storage = storage
- @statsd.timed(
- metric="swh_provenance_archive_api_accesstime_seconds",
- tags={"method": "directory_ls"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
def directory_ls(self, id: Sha1Git) -> Iterable[Dict[str, Any]]:
# TODO: add file size filtering
for entry in self.storage.directory_ls(id):
@@ -29,18 +28,14 @@
}
@statsd.timed(
- metric="swh_provenance_archive_api_accesstime_seconds",
- tags={"method": "revision_get_parents"},
+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
)
def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
rev = self.storage.revision_get([id])[0]
if rev is not None:
yield from rev.parents
- @statsd.timed(
- metric="swh_provenance_archive_api_accesstime_seconds",
- tags={"method": "snapshot_get_heads"},
- )
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
from swh.core.utils import grouper
from swh.storage.algos.snapshot import snapshot_get_all_branches

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 9:12 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225284

Event Timeline