diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py --- a/swh/provenance/origin.py +++ b/swh/provenance/origin.py @@ -14,6 +14,8 @@ from .interface import ProvenanceInterface from .model import OriginEntry, RevisionEntry +ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds" + class CSVOriginIterator: """Iterator over origin visit statuses typically present in the given CSV @@ -43,10 +45,7 @@ return (OriginEntry(url, snapshot) for url, snapshot in self.statuses) -@statsd.timed( - metric="swh_provenance_origin_revision_layer_accesstime_seconds", - tags={"method": "main"}, -) +@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "main"}) def origin_add( provenance: ProvenanceInterface, archive: ArchiveInterface, @@ -61,10 +60,7 @@ provenance.flush() -@statsd.timed( - metric="swh_provenance_origin_revision_layer_accesstime_seconds", - tags={"method": "process_revision"}, -) +@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"}) def origin_add_revision( provenance: ProvenanceInterface, origin: OriginEntry, @@ -94,10 +90,7 @@ stack.append(parent) -@statsd.timed( - metric="swh_provenance_origin_revision_layer_accesstime_seconds", - tags={"method": "check_preferred_origin"}, -) +@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "check_preferred_origin"}) def check_preferred_origin( provenance: ProvenanceInterface, origin: OriginEntry, diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py --- a/swh/provenance/postgresql/archive.py +++ b/swh/provenance/postgresql/archive.py @@ -12,6 +12,8 @@ from swh.model.model import Sha1Git from swh.storage import get_storage +ARCHIVE_DURATION_METRIC = "swh_provenance_archive_direct_duration_seconds" + class ArchivePostgreSQL: def __init__(self, conn: psycopg2.extensions.connection) -> None: @@ -25,10 +27,7 @@ yield from entries @lru_cache(maxsize=100000) - @statsd.timed( - metric="swh_provenance_archive_direct_accesstime_seconds", - tags={"method": "directory_ls"}, - ) + @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"}) def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]: # TODO: add file size filtering with self.conn.cursor() as cursor: @@ -72,8 +71,7 @@ ] @statsd.timed( - metric="swh_provenance_archive_direct_accesstime_seconds", - tags={"method": "revision_get_parents"}, + metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"} ) def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: with self.conn.cursor() as cursor: @@ -89,10 +87,7 @@ # There should be at most one row anyway yield from (row[0] for row in cursor) - @statsd.timed( - metric="swh_provenance_archive_direct_accesstime_seconds", - tags={"method": "snapshot_get_heads"}, - ) + @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"}) def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: with self.conn.cursor() as cursor: cursor.execute( diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py --- a/swh/provenance/revision.py +++ b/swh/provenance/revision.py @@ -15,6 +15,8 @@ from .interface import ProvenanceInterface from .model import DirectoryEntry, RevisionEntry +REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds" + class CSVRevisionIterator: """Iterator over revisions typically present in the given CSV file. @@ -49,10 +51,7 @@ yield RevisionEntry(id, date=date, root=root) -@statsd.timed( - metric="swh_provenance_revision_content_layer_accesstime_seconds", - tags={"method": "main"}, -) +@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"}) def revision_add( provenance: ProvenanceInterface, archive: ArchiveInterface, @@ -88,10 +87,7 @@ provenance.flush() -@statsd.timed( - metric="swh_provenance_revision_content_layer_accesstime_seconds", - tags={"method": "process_content"}, -) +@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) def revision_process_content( archive: ArchiveInterface, provenance: ProvenanceInterface, @@ -157,10 +153,7 @@ stack.append(child) -@statsd.timed( - metric="swh_provenance_revision_content_layer_accesstime_seconds", - tags={"method": "flatten_directory"}, -) +@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"}) def flatten_directory( archive: ArchiveInterface, provenance: ProvenanceInterface, @@ -191,12 +184,12 @@ assert node.maxdate is not None # for mypy assert revision.date is not None # idem if trackall: - # The only real condition for a directory to be a frontier is that its - # content is already known and its maxdate is less (or equal) than - # current revision's date. Checking mindepth is meant to skip root - # directories (or any arbitrary depth) to improve the result. The - # option lower tries to maximize the reusage rate of previously defined - # frontiers by keeping them low in the directory tree. + # The only real condition for a directory to be a frontier is that its content + # is already known and its maxdate is less (or equal) than current revision's + # date. Checking mindepth is meant to skip root directories (or any arbitrary + # depth) to improve the result. The option lower tries to maximize the reuse + # rate of previously defined frontiers by keeping them low in the directory + # tree. return ( node.known and node.maxdate <= revision.date # all content is earlier than revision @@ -207,7 +200,7 @@ else: # If we are only tracking first occurrences, we want to ensure that all first # occurrences end up in the content_early_in_rev relation. Thus, we force for - # every blob outside a frontier to have an extrictly earlier date. + # every blob outside a frontier to have an strictly earlier date. return ( node.maxdate < revision.date # all content is earlier than revision and node.depth >= mindepth # deeper than the min allowed depth diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py --- a/swh/provenance/storage/archive.py +++ b/swh/provenance/storage/archive.py @@ -10,15 +10,14 @@ from swh.model.model import ObjectType, Sha1Git, TargetType from swh.storage.interface import StorageInterface +ARCHIVE_DURATION_METRIC = "swh_provenance_archive_api_duration_seconds" + class ArchiveStorage: def __init__(self, storage: StorageInterface) -> None: self.storage = storage - @statsd.timed( - metric="swh_provenance_archive_api_accesstime_seconds", - tags={"method": "directory_ls"}, - ) + @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"}) def directory_ls(self, id: Sha1Git) -> Iterable[Dict[str, Any]]: # TODO: add file size filtering for entry in self.storage.directory_ls(id): @@ -29,18 +28,14 @@ } @statsd.timed( - metric="swh_provenance_archive_api_accesstime_seconds", - tags={"method": "revision_get_parents"}, + metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"} ) def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: rev = self.storage.revision_get([id])[0] if rev is not None: yield from rev.parents - @statsd.timed( - metric="swh_provenance_archive_api_accesstime_seconds", - tags={"method": "snapshot_get_heads"}, - ) + @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"}) def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: from swh.core.utils import grouper from swh.storage.algos.snapshot import snapshot_get_all_branches