diff --git a/swh/provenance/journal_client.py b/swh/provenance/journal_client.py index 470b3dd..8028958 100644 --- a/swh/provenance/journal_client.py +++ b/swh/provenance/journal_client.py @@ -1,62 +1,69 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import datetime + try: from systemd.daemon import notify except ImportError: notify = None from swh.model.model import TimestampWithTimezone from swh.provenance.interface import ProvenanceInterface from swh.provenance.model import OriginEntry, RevisionEntry from swh.provenance.origin import origin_add from swh.provenance.revision import revision_add from swh.storage.interface import StorageInterface +EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + def process_journal_origins( messages, *, provenance: ProvenanceInterface, archive: StorageInterface, **cfg ) -> None: """Worker function for `JournalClient.process(worker_fn)`.""" assert set(messages) == {"origin_visit_status"}, set(messages) origin_entries = [ OriginEntry(url=visit["origin"], snapshot=visit["snapshot"]) for visit in messages["origin_visit_status"] if visit["snapshot"] is not None ] if origin_entries: with provenance: origin_add(provenance, archive, origin_entries, **cfg) if notify: notify("WATCHDOG=1") def process_journal_revisions( messages, *, provenance: ProvenanceInterface, archive: StorageInterface, **cfg ) -> None: """Worker function for `JournalClient.process(worker_fn)`.""" assert set(messages) == {"revision"}, set(messages) revisions = [] for rev in messages["revision"]: if rev["date"] is None: continue try: date = TimestampWithTimezone.from_dict(rev["date"]).to_datetime() except Exception: continue + if date <= EPOCH: + continue + revisions.append( RevisionEntry( id=rev["id"], root=rev["directory"], date=date, parents=rev["parents"], ) ) if revisions: with provenance: revision_add(provenance, archive, revisions, **cfg) if notify: notify("WATCHDOG=1")