Page MenuHomeSoftware Heritage

D8251.id29804.diff
No OneTemporary

D8251.id29804.diff

diff --git a/swh/scrubber/storage_checker.py b/swh/scrubber/storage_checker.py
--- a/swh/scrubber/storage_checker.py
+++ b/swh/scrubber/storage_checker.py
@@ -11,6 +11,7 @@
import logging
from typing import Iterable, Union
+from swh.core.statsd import statsd
from swh.journal.serializers import value_to_kafka
from swh.model import swhids
from swh.model.model import (
@@ -91,7 +92,7 @@
for range_start, range_end in backfill.RANGE_GENERATORS[self.object_type](
self.start_object, self.end_object
):
- logger.info(
+ logger.debug(
"Processing %s range %s to %s",
self.object_type,
backfill._format_range_bound(range_start),
@@ -103,22 +104,38 @@
)
objects = list(objects)
- self.check_object_hashes(objects)
- self.check_object_references(objects)
+ with statsd.timed(
+ "swh_scrubber_batch_duration_seconds",
+ tags={"object_type": self.object_type},
+ ):
+ self.check_object_hashes(objects)
+ self.check_object_references(objects)
def check_object_hashes(self, objects: Iterable[ScrubbableObject]):
"""Recomputes hashes, and reports mismatches."""
+ count = 0
for object_ in objects:
if isinstance(object_, Content):
# TODO
continue
real_id = object_.compute_hash()
+ count += 1
if object_.id != real_id:
+ statsd.increment(
+ "swh_scrubber_hash_mismatch_total",
+ tags={"object_type": self.object_type},
+ )
self.db.corrupt_object_add(
object_.swhid(),
self.datastore_info(),
value_to_kafka(object_.to_dict()),
)
+ if count:
+ statsd.increment(
+ "swh_scrubber_objects_hashed_total",
+ count,
+ tags={"object_type": self.object_type},
+ )
def check_object_references(self, objects: Iterable[ScrubbableObject]):
"""Check all objects references by these objects exist."""

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:31 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226400

Event Timeline