diff --git a/swh/provenance/api/client.py b/swh/provenance/api/client.py --- a/swh/provenance/api/client.py +++ b/swh/provenance/api/client.py @@ -353,7 +353,7 @@ properties: pika.spec.BasicProperties, body: bytes, ) -> None: - LOGGER.info( + LOGGER.debug( "Received message # %s from %s: %s", deliver.delivery_tag, properties.app_id, @@ -365,7 +365,7 @@ decode_data(body, extra_decoders=self.extra_type_decoders), ) ) - LOGGER.info("Acknowledging message %s", deliver.delivery_tag) + LOGGER.debug("Acknowledging message %s", deliver.delivery_tag) channel.basic_ack(delivery_tag=deliver.delivery_tag) def stop_consuming(self) -> None: diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py --- a/swh/provenance/origin.py +++ b/swh/provenance/origin.py @@ -3,6 +3,10 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import logging + +from datetime import datetime + from itertools import islice from typing import Generator, Iterable, Iterator, List, Optional, Tuple @@ -14,8 +18,16 @@ from .interface import ProvenanceInterface from .model import OriginEntry, RevisionEntry + ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds" +LOG_FORMAT = ( + "%(levelname) -10s %(asctime)s %(name) -30s %(funcName) " + "-35s %(lineno) -5d: %(message)s" +) + +LOGGER = logging.getLogger(__name__) + class CSVOriginIterator: """Iterator over origin visit statuses typically present in the given CSV @@ -57,11 +69,13 @@ if commit: provenance.flush() + @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "proceed_origin"}) def proceed_origin( - provenance: ProvenanceInterface, - archive: ArchiveInterface, - origin: OriginEntry) -> None: + provenance: ProvenanceInterface, archive: ArchiveInterface, origin: OriginEntry +) -> None: + LOGGER.info("Processing origin %s", origin.url) + start = datetime.now() provenance.origin_add(origin) origin.retrieve_revisions(archive) for revision in origin.revisions: @@ -71,6 +85,8 @@ # head is treated separately check_preferred_origin(provenance, origin, revision) provenance.revision_add_to_origin(origin, revision) + end = datetime.now() + LOGGER.info("Processed origin %s in %s", origin.url, (end - start)) @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"}) diff --git a/swh/provenance/tools/origins/client.py b/swh/provenance/tools/origins/client.py --- a/swh/provenance/tools/origins/client.py +++ b/swh/provenance/tools/origins/client.py @@ -62,6 +62,9 @@ if __name__ == "__main__": + + logging.basicConfig(level=logging.INFO) + # Check parameters if len(sys.argv) != 2: print("usage: client ")