D6358.diff
No OneTemporary
Actions

Size

8 KB

Subscribers

None

D6358.diff
View Options

	diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
	--- a/swh/provenance/origin.py
	+++ b/swh/provenance/origin.py
	@@ -14,6 +14,8 @@
	from .interface import ProvenanceInterface
	from .model import OriginEntry, RevisionEntry

	+ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
	+

	class CSVOriginIterator:
	"""Iterator over origin visit statuses typically present in the given CSV
	@@ -43,10 +45,7 @@
	return (OriginEntry(url, snapshot) for url, snapshot in self.statuses)


	-@statsd.timed(
	- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
	- tags={"method": "main"},
	-)
	+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "main"})
	def origin_add(
	provenance: ProvenanceInterface,
	archive: ArchiveInterface,
	@@ -61,10 +60,7 @@
	provenance.flush()


	-@statsd.timed(
	- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
	- tags={"method": "process_revision"},
	-)
	+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"})
	def origin_add_revision(
	provenance: ProvenanceInterface,
	origin: OriginEntry,
	@@ -94,10 +90,7 @@
	stack.append(parent)


	-@statsd.timed(
	- metric="swh_provenance_origin_revision_layer_accesstime_seconds",
	- tags={"method": "check_preferred_origin"},
	-)
	+@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "check_preferred_origin"})
	def check_preferred_origin(
	provenance: ProvenanceInterface,
	origin: OriginEntry,
	diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
	--- a/swh/provenance/postgresql/archive.py
	+++ b/swh/provenance/postgresql/archive.py
	@@ -12,6 +12,8 @@
	from swh.model.model import Sha1Git
	from swh.storage import get_storage

	+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_direct_duration_seconds"
	+

	class ArchivePostgreSQL:
	def __init__(self, conn: psycopg2.extensions.connection) -> None:
	@@ -25,10 +27,7 @@
	yield from entries

	@lru_cache(maxsize=100000)
	- @statsd.timed(
	- metric="swh_provenance_archive_direct_accesstime_seconds",
	- tags={"method": "directory_ls"},
	- )
	+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
	def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]:
	# TODO: add file size filtering
	with self.conn.cursor() as cursor:
	@@ -72,8 +71,7 @@
	]

	@statsd.timed(
	- metric="swh_provenance_archive_direct_accesstime_seconds",
	- tags={"method": "revision_get_parents"},
	+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
	)
	def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
	with self.conn.cursor() as cursor:
	@@ -89,10 +87,7 @@
	# There should be at most one row anyway
	yield from (row[0] for row in cursor)

	- @statsd.timed(
	- metric="swh_provenance_archive_direct_accesstime_seconds",
	- tags={"method": "snapshot_get_heads"},
	- )
	+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
	def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
	with self.conn.cursor() as cursor:
	cursor.execute(
	diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
	--- a/swh/provenance/revision.py
	+++ b/swh/provenance/revision.py
	@@ -15,6 +15,8 @@
	from .interface import ProvenanceInterface
	from .model import DirectoryEntry, RevisionEntry

	+REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
	+

	class CSVRevisionIterator:
	"""Iterator over revisions typically present in the given CSV file.
	@@ -49,10 +51,7 @@
	yield RevisionEntry(id, date=date, root=root)


	-@statsd.timed(
	- metric="swh_provenance_revision_content_layer_accesstime_seconds",
	- tags={"method": "main"},
	-)
	+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"})
	def revision_add(
	provenance: ProvenanceInterface,
	archive: ArchiveInterface,
	@@ -88,10 +87,7 @@
	provenance.flush()


	-@statsd.timed(
	- metric="swh_provenance_revision_content_layer_accesstime_seconds",
	- tags={"method": "process_content"},
	-)
	+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"})
	def revision_process_content(
	archive: ArchiveInterface,
	provenance: ProvenanceInterface,
	@@ -157,10 +153,7 @@
	stack.append(child)


	-@statsd.timed(
	- metric="swh_provenance_revision_content_layer_accesstime_seconds",
	- tags={"method": "flatten_directory"},
	-)
	+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"})
	def flatten_directory(
	archive: ArchiveInterface,
	provenance: ProvenanceInterface,
	@@ -191,12 +184,12 @@
	assert node.maxdate is not None # for mypy
	assert revision.date is not None # idem
	if trackall:
	- # The only real condition for a directory to be a frontier is that its
	- # content is already known and its maxdate is less (or equal) than
	- # current revision's date. Checking mindepth is meant to skip root
	- # directories (or any arbitrary depth) to improve the result. The
	- # option lower tries to maximize the reusage rate of previously defined
	- # frontiers by keeping them low in the directory tree.
	+ # The only real condition for a directory to be a frontier is that its content
	+ # is already known and its maxdate is less (or equal) than current revision's
	+ # date. Checking mindepth is meant to skip root directories (or any arbitrary
	+ # depth) to improve the result. The option lower tries to maximize the reuse
	+ # rate of previously defined frontiers by keeping them low in the directory
	+ # tree.
	return (
	node.known
	and node.maxdate <= revision.date # all content is earlier than revision
	@@ -207,7 +200,7 @@
	else:
	# If we are only tracking first occurrences, we want to ensure that all first
	# occurrences end up in the content_early_in_rev relation. Thus, we force for
	- # every blob outside a frontier to have an extrictly earlier date.
	+ # every blob outside a frontier to have an strictly earlier date.
	return (
	node.maxdate < revision.date # all content is earlier than revision
	and node.depth >= mindepth # deeper than the min allowed depth
	diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
	--- a/swh/provenance/storage/archive.py
	+++ b/swh/provenance/storage/archive.py
	@@ -10,15 +10,14 @@
	from swh.model.model import ObjectType, Sha1Git, TargetType
	from swh.storage.interface import StorageInterface

	+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_api_duration_seconds"
	+

	class ArchiveStorage:
	def __init__(self, storage: StorageInterface) -> None:
	self.storage = storage

	- @statsd.timed(
	- metric="swh_provenance_archive_api_accesstime_seconds",
	- tags={"method": "directory_ls"},
	- )
	+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
	def directory_ls(self, id: Sha1Git) -> Iterable[Dict[str, Any]]:
	# TODO: add file size filtering
	for entry in self.storage.directory_ls(id):
	@@ -29,18 +28,14 @@
	}

	@statsd.timed(
	- metric="swh_provenance_archive_api_accesstime_seconds",
	- tags={"method": "revision_get_parents"},
	+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
	)
	def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
	rev = self.storage.revision_get([id])[0]
	if rev is not None:
	yield from rev.parents

	- @statsd.timed(
	- metric="swh_provenance_archive_api_accesstime_seconds",
	- tags={"method": "snapshot_get_heads"},
	- )
	+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
	def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
	from swh.core.utils import grouper
	from swh.storage.algos.snapshot import snapshot_get_all_branches

File Metadata

Mime Type: text/plain
Expires: Dec 20 2024, 9:12 AM (28 w, 1 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3225284

D6358.diffNo OneTemporaryActions

D6358.diffView Options

File Metadata

Event Timeline

D6358.diff
No OneTemporary
Actions

D6358.diff
View Options