diff --git a/requirements-swh-journal.txt b/requirements-swh-journal.txt --- a/requirements-swh-journal.txt +++ b/requirements-swh-journal.txt @@ -1 +1 @@ -swh.journal >= 0.5.1 +swh.journal >= 0.6.2 diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,3 @@ swh.core[db,http] >= 0.5 -swh.model >= 0.7.2 +swh.model >= 0.10.0 swh.objstorage >= 0.2.2 diff --git a/sql/upgrades/165.sql b/sql/upgrades/165.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/165.sql @@ -0,0 +1,12 @@ +-- SWH DB schema upgrade +-- from_version: 164 +-- to_version: 165 +-- description: add type to origin_visit_status + +insert into dbversion(version, release, description) + values(165, now(), 'Work In Progress'); + +-- Adapt the origin_visit_status table for the new type column +alter table origin_visit_status add column type text; + +comment on column origin_visit_status.type is 'Type of loader that did the visit (hg, git, ...)'; diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py --- a/swh/storage/cassandra/model.py +++ b/swh/storage/cassandra/model.py @@ -196,14 +196,13 @@ origin: str visit: int date: datetime.datetime + type: str status: str metadata: str snapshot: bytes @classmethod def from_dict(cls: Type[T], d: Dict[str, Any]) -> T: - d = d.copy() - d.pop("type", None) return cls(**d) # type: ignore diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py --- a/swh/storage/cassandra/schema.py +++ b/swh/storage/cassandra/schema.py @@ -157,6 +157,7 @@ origin text, visit bigint, date timestamp, + type text, status ascii, metadata text, snapshot blob, diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -888,6 +888,7 @@ origin=visit.origin, visit=visit.visit, date=visit.date, + type=visit.type, status="created", snapshot=None, ) @@ -897,6 +898,14 @@ def _origin_visit_status_add(self, visit_status: OriginVisitStatus) -> None: """Add an origin visit status""" + if visit_status.type is None: + origin_row = self._cql_runner.origin_visit_get_one( + visit_status.origin, visit_status.visit + ) + if origin_row is None: + raise StorageArgumentException(f"Unknown origin {visit_status.origin}") + visit_status = attr.evolve(visit_status, type=origin_row.type) + self.journal_writer.origin_visit_status_add([visit_status]) self._cql_runner.origin_visit_status_add_one( converters.visit_status_to_row(visit_status) @@ -929,6 +938,9 @@ "origin": visit["origin"], # but keep the date of the creation of the origin visit "date": visit["date"], + # We use the visit type from origin visit + # if it's not present on the origin visit status + "type": visit_status.type or visit["type"], } def _origin_visit_get_latest_status(self, visit: OriginVisit) -> OriginVisitStatus: diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -447,6 +447,7 @@ "origin", "visit", "date", + "type", "status", "snapshot", "metadata", @@ -514,6 +515,7 @@ "o.url AS origin", "ovs.visit", "ovs.date", + "ovs.type", "ovs.status", "ovs.snapshot", "ovs.metadata", diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -844,6 +844,7 @@ origin=visit.origin, visit=visit.visit, date=visit.date, + type=visit.type, status="created", snapshot=None, ) @@ -867,13 +868,27 @@ def origin_visit_status_add( self, visit_statuses: List[OriginVisitStatus], db=None, cur=None, ) -> None: + visit_statuses_ = [] + # First round to check existence (fail early if any is ko) for visit_status in visit_statuses: origin_url = self.origin_get([visit_status.origin], db=db, cur=cur)[0] if not origin_url: raise StorageArgumentException(f"Unknown origin {visit_status.origin}") - for visit_status in visit_statuses: + if visit_status.type is None: + origin_visit = self.origin_visit_get_by( + visit_status.origin, visit_status.visit, db=db, cur=cur + ) + assert origin_visit is not None + + origin_visit_status = attr.evolve(visit_status, type=origin_visit.type) + else: + origin_visit_status = visit_status + + visit_statuses_.append(origin_visit_status) + + for visit_status in visit_statuses_: self._origin_visit_status_add(visit_status, db, cur) @timed @@ -1036,6 +1051,7 @@ origin=row_d["origin"], visit=row_d["visit"], date=row_d["date"], + type=row_d["type"], status=row_d["status"], snapshot=row_d["snapshot"], metadata=row_d["metadata"], diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -301,6 +301,7 @@ origin bigint not null, visit bigint not null, date timestamptz not null, + type text, status origin_visit_state not null, metadata jsonb, snapshot sha1_git @@ -309,6 +310,7 @@ comment on column origin_visit_status.origin is 'Origin concerned by the visit update'; comment on column origin_visit_status.visit is 'Visit concerned by the visit update'; comment on column origin_visit_status.date is 'Visit update timestamp'; +comment on column origin_visit_status.type is 'Type of loader that did the visit (hg, git, ...)'; comment on column origin_visit_status.status is 'Visit status (ongoing, failed, full)'; comment on column origin_visit_status.metadata is 'Optional origin visit metadata'; comment on column origin_visit_status.snapshot is 'Optional, possibly partial, snapshot of the origin visit. It can be partial.'; diff --git a/swh/storage/tests/algos/test_origin.py b/swh/storage/tests/algos/test_origin.py --- a/swh/storage/tests/algos/test_origin.py +++ b/swh/storage/tests/algos/test_origin.py @@ -89,33 +89,37 @@ # origin visit status 1 for origin visit 1 ovs11 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=ov1.date + datetime.timedelta(seconds=10), # so it's not ignored + type=ov1.type, status="partial", snapshot=None, ) # origin visit status 2 for origin visit 1 ovs12 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=sample_data.date_visit2, + type=ov1.type, status="ongoing", snapshot=None, ) # origin visit status 1 for origin visit 2 ovs21 = OriginVisitStatus( - origin=origin2.url, + origin=ov2.origin, visit=ov2.visit, date=ov2.date + datetime.timedelta(seconds=10), # so it's not ignored + type=ov2.type, status="ongoing", snapshot=None, ) # origin visit status 2 for origin visit 2 ovs22 = OriginVisitStatus( - origin=origin2.url, + origin=ov2.origin, visit=ov2.visit, date=date_now, + type=ov2.type, status="full", snapshot=snapshot.id, metadata={"something": "wicked"}, @@ -309,9 +313,10 @@ date_past = now() - datetime.timedelta(weeks=20) ovs1 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=ov1.date, + type=ov1.type, status="created", snapshot=None, ) @@ -321,9 +326,10 @@ new_visit_statuses.append( OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=status_date, + type=ov1.type, status="created", snapshot=None, ) diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py --- a/swh/storage/tests/algos/test_snapshot.py +++ b/swh/storage/tests/algos/test_snapshot.py @@ -129,9 +129,10 @@ swh_storage.origin_visit_status_add( [ OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_now, + type=ov1.type, status="full", snapshot=complete_snapshot.id, ) @@ -174,16 +175,18 @@ # Add complete_snapshot to visit1 which targets revision1 ovs1, ovs2 = [ OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_visit2, + type=ov1.type, status="partial", snapshot=complete_snapshot.id, ), OriginVisitStatus( - origin=origin.url, + origin=ov2.origin, visit=ov2.visit, date=now(), + type=ov2.type, status="full", snapshot=empty_snapshot.id, ), @@ -232,16 +235,18 @@ # Add complete_snapshot to visit1 which targets revision1 ovs1, ovs2 = [ OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_visit2, + type=ov1.type, status="partial", snapshot=complete_snapshot.id, ), OriginVisitStatus( - origin=origin.url, + origin=ov2.origin, visit=ov2.visit, date=now(), + type=ov2.type, status="full", snapshot=empty_snapshot.id, ), diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -1426,25 +1426,28 @@ )[0] ovs1 = OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_visit1, + type=ov1.type, status="created", snapshot=None, ) ovs2 = OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_visit2, + type=ov1.type, status="partial", snapshot=None, ) ovs3 = OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_visit3, + type=ov1.type, status="full", snapshot=sample_data.snapshot.id, metadata={}, @@ -1770,16 +1773,18 @@ assert ov2 == origin_visit2 ovs1 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=date_visit, + type=ov1.type, status="created", snapshot=None, ) ovs2 = OriginVisitStatus( - origin=origin1.url, + origin=ov2.origin, visit=ov2.visit, date=date_visit2, + type=ov2.type, status="created", snapshot=None, ) @@ -1850,16 +1855,18 @@ ) ovs1 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=sample_data.date_visit1, + type=ov1.type, status="created", snapshot=None, ) ovs2 = OriginVisitStatus( - origin=origin2.url, + origin=ov2.origin, visit=ov2.visit, date=sample_data.date_visit2, + type=ov2.type, status="created", snapshot=None, ) @@ -1869,6 +1876,7 @@ origin=ov1.origin, visit=ov1.visit, date=date_visit_now, + type=ov1.type, status="full", snapshot=snapshot.id, ) @@ -1878,6 +1886,7 @@ origin=ov2.origin, visit=ov2.visit, date=date_visit_now, + type=ov2.type, status="ongoing", snapshot=None, metadata={"intrinsic": "something"}, @@ -1930,9 +1939,10 @@ )[0] ovs1 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=sample_data.date_visit1, + type=ov1.type, status="created", snapshot=None, ) @@ -1941,6 +1951,7 @@ origin=ov1.origin, visit=ov1.visit, date=date_visit_now, + type=ov1.type, status="full", snapshot=snapshot.id, ) @@ -2186,9 +2197,10 @@ # Add snapshot to visit1; require_snapshot=True makes it return first visit swh_storage.snapshot_add([complete_snapshot]) visit_status_with_snapshot = OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=round_to_milliseconds(now()), + type=ov1.type, status="ongoing", snapshot=complete_snapshot.id, ) @@ -2227,9 +2239,10 @@ assert actual_visit is None visit_status1_full = OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=round_to_milliseconds(now()), + type=ov1.type, status="full", snapshot=complete_snapshot.id, ) @@ -2255,9 +2268,10 @@ swh_storage.snapshot_add([empty_snapshot]) visit_status2_full = OriginVisitStatus( - origin=origin.url, + origin=ov2.origin, visit=ov2.visit, date=round_to_milliseconds(now()), + type=ov2.type, status="ongoing", snapshot=empty_snapshot.id, ) @@ -2284,9 +2298,10 @@ # Add snapshot to visit3 (same date as visit2) visit_status3_with_snapshot = OriginVisitStatus( - origin=origin.url, + origin=ov3.origin, visit=ov3.visit, date=round_to_milliseconds(now()), + type=ov3.type, status="ongoing", snapshot=complete_snapshot.id, ) @@ -2434,31 +2449,35 @@ assert sample_data.date_visit2 < date_now ovs1 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=sample_data.date_visit1, + type=ov1.type, status="partial", snapshot=None, ) ovs2 = OriginVisitStatus( - origin=origin1.url, + origin=ov1.origin, visit=ov1.visit, date=sample_data.date_visit2, + type=ov1.type, status="ongoing", snapshot=None, ) ovs3 = OriginVisitStatus( - origin=origin1.url, + origin=ov2.origin, visit=ov2.visit, date=sample_data.date_visit2 + datetime.timedelta(minutes=1), # to not be ignored + type=ov2.type, status="ongoing", snapshot=None, ) ovs4 = OriginVisitStatus( - origin=origin1.url, + origin=ov2.origin, visit=ov2.visit, date=date_now, + type=ov2.type, status="full", snapshot=snapshot.id, metadata={"something": "wicked"}, @@ -2559,9 +2578,10 @@ swh_storage.origin_visit_status_add( [ OriginVisitStatus( - origin=origin.url, + origin=ov1.origin, visit=ov1.visit, date=date_now, + type=ov1.type, status="full", snapshot=empty_snapshot.id, ) @@ -2573,8 +2593,9 @@ ovs1 = OriginVisitStatus.from_dict( { - "origin": origin.url, + "origin": ov1.origin, "date": sample_data.date_visit1, + "type": ov1.type, "visit": ov1.visit, "status": "created", "snapshot": None, @@ -2583,8 +2604,9 @@ ) ovs2 = OriginVisitStatus.from_dict( { - "origin": origin.url, + "origin": ov1.origin, "date": date_now, + "type": ov1.type, "visit": ov1.visit, "status": "full", "metadata": None,