diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -841,6 +841,42 @@ """Add an origin visit status""" self._cql_runner.origin_visit_status_add_one(visit_status) + def origin_visit_status_add( + self, + origin_url: str, + visit_id: int, + date: datetime.datetime, + status: str, + metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None, + ) -> OriginVisitStatus: + if not isinstance(date, datetime.datetime): + raise StorageArgumentException("Date must be a datetime") + + origin = self.origin_get({"url": origin_url}) + if not origin: + raise StorageArgumentException(f"Unknown origin {origin_url}") + + visit = self.origin_visit_get_by(origin_url, visit_id) + if not visit: + raise StorageArgumentException( + f"Unknown origin visit ({origin_url}, {visit_id})" + ) + + with convert_validation_exceptions(): + visit_status = OriginVisitStatus( + origin=origin_url, + visit=visit_id, + date=date, + status=status, + snapshot=snapshot, + metadata=metadata, + ) + self._origin_visit_status_add(visit_status) + self.journal_writer.origin_visit_status_add([visit_status]) + + return visit_status + def origin_visit_update( self, origin: str, diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -827,6 +827,46 @@ # return last visit return visit + def origin_visit_status_add( + self, + origin_url: str, + visit_id: int, + date: datetime.datetime, + status: str, + metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None, + ) -> OriginVisitStatus: + if not isinstance(date, datetime.datetime): + raise StorageArgumentException("Date must be a datetime") + + origin = self.origin_get({"url": origin_url}) + if not origin: + raise StorageArgumentException(f"Unknown origin {origin_url}") + + visit = self.origin_visit_get_by(origin_url, visit_id) + if not visit: + raise StorageArgumentException( + f"Unknown origin visit ({origin_url}, {visit_id})" + ) + + visit_key = (origin_url, visit_id) + assert origin_url in self._origins + + with convert_validation_exceptions(): + visit_status = OriginVisitStatus( + origin=origin_url, + visit=visit_id, + date=date, + status=status, + snapshot=snapshot, + metadata=metadata, + ) + self._origin_visit_statuses[visit_key].append(visit_status) + self._objects[visit_key].append(("origin_visit", None)) + self.journal_writer.origin_visit_status_add([visit_status]) + + return visit_status + def origin_visit_update( self, origin: str, @@ -923,7 +963,6 @@ visit_key = (origin, visit_id) visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date) - return OriginVisit.from_dict( { # default to the values in visit @@ -938,6 +977,7 @@ def origin_visit_get( self, origin: str, last_visit: Optional[int] = None, limit: Optional[int] = None ) -> Iterable[Dict[str, Any]]: + origin_url = self._get_origin_url(origin) if origin_url in self._origin_visits: visits = self._origin_visits[origin_url] @@ -986,6 +1026,7 @@ if not ori: return None visits = self._origin_visits[ori.url] + visits = [ self._origin_visit_get_updated(visit.origin, visit.visit) for visit in visits diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -13,6 +13,7 @@ Directory, Origin, OriginVisit, + OriginVisitStatus, Revision, Release, Snapshot, @@ -801,6 +802,38 @@ """ ... + @remote_api_endpoint("origin/visit_status/add") + def origin_visit_status_add( + self, + origin_url: str, + visit_id: int, + date: datetime.datetime, + status: str, + metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None, + ) -> OriginVisitStatus: + """Add an origin_visit_status for the origin visit {origin, visit} at date + with a given status + + Args: + origin_url: visited origin identifier (its URL) + visit_id: visit identifier + date: timestamp of such visit + status: type of loader used for the visit (hg, git, ...) + metadata: Data associated to the visit + snapshot (sha1_git): identifier of the snapshot to add to + the visit + + Raises: + StorageArgumentException if the date is not a datetime, or if the origin + visit is unknown + + Returns: + OriginVisitStatus model object reference + + """ + ... + @remote_api_endpoint("origin/visit/update") def origin_visit_update( self, diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -885,6 +885,46 @@ "origin_visit_status:add", count=1, method_name="origin_visit_status" ) + @timed + @db_transaction() + def origin_visit_status_add( + self, + origin_url: str, + visit_id: int, + date: datetime.datetime, + status: str, + metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None, + db=None, + cur=None, + ) -> OriginVisitStatus: + if not isinstance(date, datetime.datetime): + raise StorageArgumentException("Date must be a datetime") + + origin = self.origin_get({"url": origin_url}) + if not origin: + raise StorageArgumentException(f"Unknown origin {origin_url}") + + visit = self.origin_visit_get_by(origin_url, visit_id, db=db, cur=cur) + if not visit: + raise StorageArgumentException( + f"Unknown origin visit ({origin_url}, {visit_id})" + ) + + with convert_validation_exceptions(): + visit_status = OriginVisitStatus( + origin=origin_url, + visit=visit_id, + date=date, + status=status, + snapshot=snapshot, + metadata=metadata, + ) + self._origin_visit_status_add(visit_status, db, cur) + self.journal_writer.origin_visit_status_add([visit_status]) + + return visit_status + @timed @db_transaction() def origin_visit_update( diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1684,6 +1684,53 @@ if type(cm.value) == psycopg2.ProgrammingError: assert cm.value.pgcode == psycopg2.errorcodes.UNDEFINED_FUNCTION + def test_origin_visit_status_add_validation(self, swh_storage): + """Wrong origin_visit_status_add input should raise validation error""" + # given + with pytest.raises(StorageArgumentException, match="Date must be a datetime"): + swh_storage.origin_visit_status_add( + "origin_url", 10, "20201010", status="ongoing" + ) + + date_visit = now() + with pytest.raises(StorageArgumentException, match="Unknown origin origin_url"): + swh_storage.origin_visit_status_add( + "origin_url", 10, date_visit, status="ongoing" + ) + + origin_url = swh_storage.origin_add_one(data.origin2) + with pytest.raises(StorageArgumentException, match="Unknown origin visit"): + swh_storage.origin_visit_status_add( + origin_url, 10, date_visit, status="ongoing" + ) + + def test_origin_visit_status_add(self, swh_storage): + """Correct origin_visit_status add instruction should add a new visit status + + """ + origin_url = swh_storage.origin_add_one(data.origin2) + date_visit = data.date_visit1 + origin_visit1 = swh_storage.origin_visit_add( + origin_url, date=date_visit, type=data.type_visit1 + ) + + snapshot_id = data.snapshot["id"] + date_visit_now = now() + swh_storage.origin_visit_status_add( + origin_url, + origin_visit1.visit, + date_visit_now, + status="full", + snapshot=snapshot_id, + ) + + origin_visit = swh_storage.origin_visit_get_latest( + origin_url, require_snapshot=True + ) + assert origin_visit + assert origin_visit["status"] == "full" + assert origin_visit["snapshot"] == snapshot_id + def test_origin_visit_update(self, swh_storage): # given origin_url = swh_storage.origin_add_one(data.origin) diff --git a/swh/storage/writer.py b/swh/storage/writer.py --- a/swh/storage/writer.py +++ b/swh/storage/writer.py @@ -10,6 +10,7 @@ from swh.model.model import ( Origin, OriginVisit, + OriginVisitStatus, Snapshot, Directory, Revision, @@ -84,5 +85,8 @@ def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: self.write_additions("origin_visit", visits) + def origin_visit_status_add(self, visits: Iterable[OriginVisitStatus]) -> None: + self.write_additions("origin_visit_status", visits) + def origin_add(self, origins: Iterable[Origin]) -> None: self.write_additions("origin", origins)