diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,3 @@ swh.core[db,http] >= 0.0.94 -swh.model >= 0.3 +swh.model >= 0.3.4 swh.objstorage >= 0.0.40 diff --git a/sql/upgrades/155.sql b/sql/upgrades/155.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/155.sql @@ -0,0 +1,34 @@ +-- SWH DB schema upgrade +-- from_version: 154 +-- to_version: 155 +-- description: Drop obsolete origin-visit fields + +-- latest schema version +insert into dbversion(version, release, description) + values(155, now(), 'Work In Progress'); + +alter table origin_visit drop column snapshot; +alter table origin_visit drop column metadata; +alter table origin_visit drop column status; + +create or replace function swh_origin_visit_add(origin_url text, date timestamptz, type text) + returns bigint + language sql +as $$ + with origin_id as ( + select id + from origin + where url = origin_url + ), last_known_visit as ( + select coalesce(max(visit), 0) as visit + from origin_visit + where origin = (select id from origin_id) + ) + insert into origin_visit (origin, date, type, visit) + values ((select id from origin_id), date, type, + (select visit from last_known_visit) + 1) + returning visit; +$$; + +drop index origin_visit_type_status_date_idx; +create index concurrently on origin_visit(type, date); diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py --- a/swh/storage/backfill.py +++ b/swh/storage/backfill.py @@ -103,15 +103,7 @@ ], "snapshot": ["id", "object_id"], "origin": ["url"], - "origin_visit": [ - "visit", - "type", - ("origin.url", "origin"), - "date", - "snapshot", - "status", - "metadata", - ], + "origin_visit": ["visit", "type", ("origin.url", "origin"), "date",], "origin_visit_status": [ "visit", ("origin.url", "origin"), diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py --- a/swh/storage/cassandra/cql.py +++ b/swh/storage/cassandra/cql.py @@ -655,9 +655,6 @@ "visit", "type", "date", - "status", - "metadata", - "snapshot", ] @_prepared_statement("SELECT * FROM origin_visit WHERE origin = ? AND visit > ?") diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py --- a/swh/storage/cassandra/schema.py +++ b/swh/storage/cassandra/schema.py @@ -148,9 +148,6 @@ visit bigint, date timestamp, type text, - status ascii, - metadata text, - snapshot blob, PRIMARY KEY ((origin), visit) ); diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -809,11 +809,15 @@ self._cql_runner.origin_visit_add_one(visit) assert visit.visit is not None all_visits.append(visit) - - visit_status_dict = visit.to_dict() - visit_status_dict.pop("type") - visit_status = OriginVisitStatus.from_dict(visit_status_dict) - self._origin_visit_status_add(visit_status) + self._origin_visit_status_add( + OriginVisitStatus( + origin=visit.origin, + visit=visit.visit, + date=visit.date, + status="created", + snapshot=None, + ) + ) return all_visits @@ -881,7 +885,6 @@ **visit._asdict(), "origin": visit.origin, "date": visit.date.replace(tzinfo=datetime.timezone.utc), - "metadata": (json.loads(visit.metadata) if visit.metadata else None), } def origin_visit_get( diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -428,8 +428,7 @@ revision_get_cols = revision_add_cols + ["parents"] def origin_visit_add(self, origin, ts, type, cur=None): - """Add a new origin_visit for origin origin at timestamp ts with - status 'ongoing'. + """Add a new origin_visit for origin origin at timestamp ts. Args: origin: origin concerned by the visit @@ -477,6 +476,13 @@ + [jsonize(visit_status.metadata)], ) + origin_visit_upsert_cols = [ + "origin", + "visit", + "date", + "type", + ] + def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None: # doing an extra query like this is way simpler than trying to join # the origin id in the query below @@ -487,23 +493,14 @@ query = """INSERT INTO origin_visit ({cols}) VALUES ({values}) ON CONFLICT ON CONSTRAINT origin_visit_pkey DO UPDATE SET {updates}""".format( - cols=", ".join(self.origin_visit_get_cols), - values=", ".join("%s" for col in self.origin_visit_get_cols), + cols=", ".join(self.origin_visit_upsert_cols), + values=", ".join("%s" for col in self.origin_visit_upsert_cols), updates=", ".join( - "{0}=excluded.{0}".format(col) for col in self.origin_visit_get_cols + "{0}=excluded.{0}".format(col) for col in self.origin_visit_upsert_cols ), ) cur.execute( - query, - ( - origin_id, - ov.visit, - ov.date, - ov.type, - ov.status, - ov.metadata, - ov.snapshot, - ), + query, (origin_id, ov.visit, ov.date, ov.type), ) origin_visit_get_cols = [ diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -798,10 +798,6 @@ while len(self._origin_visits[origin_url]) < visit.visit: self._origin_visits[origin_url].append(None) self._origin_visits[origin_url][visit.visit - 1] = visit - visit_status_dict = visit.to_dict() - visit_status_dict.pop("type") - visit_status = OriginVisitStatus.from_dict(visit_status_dict) - self._origin_visit_status_add_one(visit_status) else: # visit ids are in the range [1, +inf[ visit_id = len(self._origin_visits[origin_url]) + 1 @@ -809,13 +805,17 @@ self.journal_writer.origin_visit_add([visit]) self._origin_visits[origin_url].append(visit) visit_key = (origin_url, visit.visit) - - visit_status_dict = visit.to_dict() - visit_status_dict.pop("type") - visit_status = OriginVisitStatus.from_dict(visit_status_dict) - self._origin_visit_status_add_one(visit_status) self._objects[visit_key].append(("origin_visit", None)) assert visit.visit is not None + self._origin_visit_status_add_one( + OriginVisitStatus( + origin=visit.origin, + visit=visit.visit, + date=visit.date, + status="created", + snapshot=None, + ) + ) all_visits.append(visit) return all_visits diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql --- a/swh/storage/sql/30-swh-schema.sql +++ b/swh/storage/sql/30-swh-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(154, now(), 'Work In Progress'); + values(155, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -280,20 +280,13 @@ origin bigint not null, visit bigint not null, date timestamptz not null, - type text not null, - -- remove those when done migrating the schema - status origin_visit_state not null, - metadata jsonb, - snapshot sha1_git + type text not null ); comment on column origin_visit.origin is 'Visited origin'; comment on column origin_visit.visit is 'Sequential visit number for the origin'; comment on column origin_visit.date is 'Visit timestamp'; comment on column origin_visit.type is 'Type of loader that did the visit (hg, git, ...)'; -comment on column origin_visit.status is '(Deprecated) Visit status'; -comment on column origin_visit.metadata is '(Deprecated) Optional origin visit metadata'; -comment on column origin_visit.snapshot is '(Deprecated) Optional snapshot of the origin visit. It can be partial.'; -- Crawling history of software origin visits by Software Heritage. Each diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql --- a/swh/storage/sql/40-swh-func.sql +++ b/swh/storage/sql/40-swh-func.sql @@ -592,9 +592,9 @@ from origin_visit where origin = (select id from origin_id) ) - insert into origin_visit (origin, date, type, visit, status) + insert into origin_visit (origin, date, type, visit) values ((select id from origin_id), date, type, - (select visit from last_known_visit) + 1, 'ongoing') + (select visit from last_known_visit) + 1) returning visit; $$; diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql --- a/swh/storage/sql/60-swh-indexes.sql +++ b/swh/storage/sql/60-swh-indexes.sql @@ -126,7 +126,7 @@ alter table origin_visit add primary key using index origin_visit_pkey; create index concurrently on origin_visit(date); -create index concurrently on origin_visit(type, status, date); +create index concurrently on origin_visit(type, date); alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid; alter table origin_visit validate constraint origin_visit_origin_fkey; diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -823,9 +823,13 @@ all_visits.append(visit) # Forced to write after for the case when the visit has no id self.journal_writer.origin_visit_add([visit]) - visit_status_dict = visit.to_dict() - visit_status_dict.pop("type") - visit_status = OriginVisitStatus.from_dict(visit_status_dict) + visit_status = OriginVisitStatus( + origin=visit.origin, + visit=visit.visit, + date=visit.date, + status="created", + snapshot=None, + ) self._origin_visit_status_add(visit_status, db=db, cur=cur) send_metric("origin_visit:add", count=nb_visits, method_name="origin_visit") diff --git a/swh/storage/tests/test_backfill.py b/swh/storage/tests/test_backfill.py --- a/swh/storage/tests/test_backfill.py +++ b/swh/storage/tests/test_backfill.py @@ -118,15 +118,12 @@ "type", "origin", "date", - "snapshot", - "status", - "metadata", ] assert ( query == """ -select visit,type,origin.url as origin,date,snapshot,status,metadata +select visit,type,origin.url as origin,date from origin_visit left join origin on origin_visit.origin=origin.id where (origin_visit.origin) >= %s and (origin_visit.origin) < %s diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import attr import copy from contextlib import contextmanager import datetime @@ -1445,15 +1444,7 @@ origin_url = origin["url"] for date_visit in visits: visit = swh_storage.origin_visit_add( - [ - OriginVisit( - origin=origin_url, - date=date_visit, - type=visit_type, - status="ongoing", - snapshot=None, - ) - ] + [OriginVisit(origin=origin_url, date=date_visit, type=visit_type,)] )[0] swh_storage.origin_visit_status_add( [ @@ -1489,15 +1480,7 @@ origin_url = origin["url"] for date_visit in visits: visit = swh_storage.origin_visit_add( - [ - OriginVisit( - origin=origin_url, - date=date_visit, - type=visit_type, - status="ongoing", - snapshot=None, - ) - ] + [OriginVisit(origin=origin_url, date=date_visit, type=visit_type,)] )[0] swh_storage.origin_visit_status_add( [ @@ -1660,18 +1643,10 @@ date_visit2 = round_to_milliseconds(date_visit2) visit1 = OriginVisit( - origin=origin1.url, - date=date_visit, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin1.url, date=date_visit, type=data.type_visit1, ) visit2 = OriginVisit( - origin=origin1.url, - date=date_visit2, - type=data.type_visit2, - status="partial", - snapshot=None, + origin=origin1.url, date=date_visit2, type=data.type_visit2, ) # add once @@ -1681,38 +1656,44 @@ assert ov1 == origin_visit1 assert ov2 == origin_visit2 + ovs1 = OriginVisitStatus( + origin=origin1.url, + visit=ov1.visit, + date=date_visit, + status="created", + snapshot=None, + ) + ovs2 = OriginVisitStatus( + origin=origin1.url, + visit=ov2.visit, + date=date_visit2, + status="created", + snapshot=None, + ) + actual_origin_visits = list(swh_storage.origin_visit_get(origin1.url)) expected_visits = [ - attr.evolve(visit1, visit=ov1.visit), - attr.evolve(visit2, visit=ov2.visit), + {**ovs1.to_dict(), "type": ov1.type}, + {**ovs2.to_dict(), "type": ov2.type}, ] - expected_visit_statuses = [] - for visit in expected_visits: - visit_status = visit.to_dict() - visit_status.pop("type") - expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status)) - assert len(expected_visits) == len(actual_origin_visits) + for visit in expected_visits: - assert visit.to_dict() in actual_origin_visits + assert visit in actual_origin_visits actual_objects = set(swh_storage.journal_writer.journal.objects) # we write to the journal as many times as we call the endpoint assert actual_objects == set( [("origin", origin1)] - + [("origin_visit", visit) for visit in expected_visits] * 2 - + [("origin_visit_status", ovs) for ovs in expected_visit_statuses] * 2 + + [("origin_visit", visit) for visit in [ov1, ov2]] * 2 + + [("origin_visit_status", ovs) for ovs in [ovs1, ovs2]] ) def test_origin_visit_add_validation(self, swh_storage): """Unknown origin when adding visits should raise""" visit = OriginVisit( - origin="something-unknown", - date=now(), - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin="something-unknown", date=now(), type=data.type_visit1, ) with pytest.raises(StorageArgumentException, match="Unknown origin"): swh_storage.origin_visit_add([visit]) @@ -1747,22 +1728,29 @@ ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin1.url, date=data.date_visit1, type=data.type_visit1, ), OriginVisit( - origin=origin2.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin2.url, date=data.date_visit2, type=data.type_visit2, ), ] ) + ovs1 = OriginVisitStatus( + origin=origin1.url, + visit=ov1.visit, + date=data.date_visit1, + status="created", + snapshot=None, + ) + ovs2 = OriginVisitStatus( + origin=origin2.url, + visit=ov2.visit, + date=data.date_visit2, + status="created", + snapshot=None, + ) + snapshot_id = data.snapshot["id"] date_visit_now = now() visit_status1 = OriginVisitStatus( @@ -1804,21 +1792,14 @@ expected_origins = [origin1, origin2] expected_visits = [ov1, ov2] - expected_visit_statuses = [] - for visit in expected_visits: # out of origin-visit-add calls - visit_status = visit.to_dict() - visit_status.pop("type") - expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status)) - - # out of origin-visit-status add calls - expected_visit_statuses += [visit_status1, visit_status2] + expected_visit_statuses = [ovs1, ovs2, visit_status1, visit_status2] + expected_objects = ( [("origin", o) for o in expected_origins] + [("origin_visit", v) for v in expected_visits] + [("origin_visit_status", ovs) for ovs in expected_visit_statuses] ) - assert len(actual_objects) == len(expected_objects) for obj in expected_objects: assert obj in actual_objects @@ -1831,15 +1812,18 @@ ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin1.url, date=data.date_visit1, type=data.type_visit1, ), ] )[0] + ovs1 = OriginVisitStatus( + origin=origin1.url, + visit=ov1.visit, + date=data.date_visit1, + status="created", + snapshot=None, + ) snapshot_id = data.snapshot["id"] date_visit_now = now() visit_status1 = OriginVisitStatus( @@ -1866,57 +1850,62 @@ expected_origins = [origin1] expected_visits = [ov1] - expected_visit_statuses = [] - for visit in expected_visits: # out of origin-visit-add calls - visit_status = visit.to_dict() - visit_status.pop("type") - expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status)) + expected_visit_statuses = [ovs1, visit_status1, visit_status1] # write twice in the journal - expected_visit_statuses += [visit_status1] * 2 expected_objects = ( [("origin", o) for o in expected_origins] + [("origin_visit", v) for v in expected_visits] + [("origin_visit_status", ovs) for ovs in expected_visit_statuses] ) - assert len(actual_objects) == len(expected_objects) for obj in expected_objects: assert obj in actual_objects def test_origin_visit_find_by_date(self, swh_storage): # given - origin_url = swh_storage.origin_add_one(data.origin) + origin = Origin.from_dict(data.origin) + swh_storage.origin_add_one(data.origin) visit1 = OriginVisit( - origin=origin_url, + origin=origin.url, date=data.date_visit2, type=data.type_visit1, + ) + visit2 = OriginVisit( + origin=origin.url, date=data.date_visit3, type=data.type_visit2, + ) + visit3 = OriginVisit( + origin=origin.url, date=data.date_visit2, type=data.type_visit3, + ) + ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) + + ovs1 = OriginVisitStatus( + origin=origin.url, + visit=ov1.visit, date=data.date_visit2, - type=data.type_visit1, status="ongoing", snapshot=None, ) - visit2 = OriginVisit( - origin=origin_url, + ovs2 = OriginVisitStatus( + origin=origin.url, + visit=ov2.visit, date=data.date_visit3, - type=data.type_visit2, status="ongoing", snapshot=None, ) - visit3 = OriginVisit( - origin=origin_url, + ovs3 = OriginVisitStatus( + origin=origin.url, + visit=ov3.visit, date=data.date_visit2, - type=data.type_visit3, status="ongoing", snapshot=None, ) - - _, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) + swh_storage.origin_visit_status_add([ovs1, ovs2, ovs3]) # Simple case - visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit3) + visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit3) assert visit["visit"] == ov2.visit # There are two visits at the same date, the latest must be returned - visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit2) + visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit2) assert visit["visit"] == ov3.visit def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): @@ -1926,11 +1915,7 @@ origin_url = swh_storage.origin_add_one(data.origin) origin_url2 = swh_storage.origin_add_one(data.origin2) visit = OriginVisit( - origin=origin_url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin_url, date=data.date_visit2, type=data.type_visit2, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -2031,31 +2016,20 @@ origin = Origin.from_dict(data.origin) swh_storage.origin_add_one(origin) visit1 = OriginVisit( - origin=origin.url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin.url, date=data.date_visit1, type=data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin.url, date=data.date_visit2, type=data.type_visit2, ) # Add a visit with the same date as the previous one visit3 = OriginVisit( - origin=origin.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin.url, date=data.date_visit2, type=data.type_visit2, ) assert data.type_visit1 != data.type_visit2 assert data.date_visit1 < data.date_visit2 ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) + origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit) origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit) @@ -2086,26 +2060,14 @@ origin = Origin.from_dict(data.origin) swh_storage.origin_add_one(origin) visit1 = OriginVisit( - origin=origin.url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin.url, date=data.date_visit1, type=data.type_visit1, ) visit2 = OriginVisit( - origin=origin.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin.url, date=data.date_visit2, type=data.type_visit2, ) # Add a visit with the same date as the previous one visit3 = OriginVisit( - origin=origin.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin.url, date=data.date_visit2, type=data.type_visit2, ) ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) @@ -2135,10 +2097,14 @@ ) ] ) - assert { + actual_visit = swh_storage.origin_visit_get_latest( + origin.url, require_snapshot=True + ) + assert actual_visit == { **origin_visit1, "snapshot": complete_snapshot.id, - } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) + "status": "ongoing", # visit1 has status created now + } assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) @@ -2188,6 +2154,7 @@ assert { **origin_visit2, "snapshot": empty_snapshot.id, + "status": "ongoing", } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) @@ -2226,11 +2193,13 @@ assert { **origin_visit3, "snapshot": complete_snapshot.id, + "status": "ongoing", } == swh_storage.origin_visit_get_latest(origin.url) assert { **origin_visit3, "snapshot": complete_snapshot.id, + "status": "ongoing", } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True) def test_origin_visit_status_get_latest(self, swh_storage): @@ -2242,18 +2211,10 @@ ov1, ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin1.url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin1.url, date=data.date_visit1, type=data.type_visit1, ), OriginVisit( - origin=origin1.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin1.url, date=data.date_visit2, type=data.type_visit2, ), ] ) @@ -2283,7 +2244,7 @@ ovs3 = OriginVisitStatus( origin=origin1.url, visit=ov2.visit, - date=data.date_visit2, + date=data.date_visit2 + datetime.timedelta(minutes=1), # to not be ignored status="ongoing", snapshot=None, ) @@ -2374,11 +2335,7 @@ ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin_url, date=data.date_visit1, type=data.type_visit1, ) ] )[0] @@ -2406,33 +2363,37 @@ by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit) assert by_ov == {**data.empty_snapshot, "next_branch": None} - data1 = { - "origin": origin_url, - "date": data.date_visit1, - "visit": ov1.visit, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data2 = { - "origin": origin_url, - "date": date_now, - "visit": ov1.visit, - "status": "full", - "metadata": None, - "snapshot": data.empty_snapshot["id"], - } + ovs1 = OriginVisitStatus.from_dict( + { + "origin": origin_url, + "date": data.date_visit1, + "visit": ov1.visit, + "status": "created", + "snapshot": None, + "metadata": None, + } + ) + ovs2 = OriginVisitStatus.from_dict( + { + "origin": origin_url, + "date": date_now, + "visit": ov1.visit, + "status": "full", + "metadata": None, + "snapshot": data.empty_snapshot["id"], + } + ) actual_objects = list(swh_storage.journal_writer.journal.objects) - assert actual_objects == [ + + expected_objects = [ ("origin", Origin.from_dict(data.origin)), - ( - "origin_visit", - OriginVisit.from_dict({**data1, "type": data.type_visit1},), - ), - ("origin_visit_status", OriginVisitStatus.from_dict(data1)), + ("origin_visit", ov1), + ("origin_visit_status", ovs1,), ("snapshot", Snapshot.from_dict(data.empty_snapshot)), - ("origin_visit_status", OriginVisitStatus.from_dict(data2),), + ("origin_visit_status", ovs2,), ] + for obj in expected_objects: + assert obj in actual_objects def test_snapshot_add_get_complete(self, swh_storage): origin_url = data.origin["url"] @@ -2610,11 +2571,7 @@ def test_snapshot_add_get_filtered(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) visit = OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin_url, date=data.date_visit1, type=data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] @@ -2734,11 +2691,7 @@ def test_snapshot_add_get(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) visit = OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin_url, date=data.date_visit1, type=data.type_visit1, ) origin_visit1 = swh_storage.origin_visit_add([visit])[0] visit_id = origin_visit1.visit @@ -2770,11 +2723,7 @@ ov1 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, + origin=origin_url, date=data.date_visit1, type=data.type_visit1, ) ] )[0] @@ -2799,11 +2748,7 @@ ov2 = swh_storage.origin_visit_add( [ OriginVisit( - origin=origin_url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + origin=origin_url, date=data.date_visit2, type=data.type_visit2, ) ] )[0] @@ -2825,55 +2770,59 @@ by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov2.visit) assert by_ov2 == {**data.snapshot, "next_branch": None} - data1 = { - "origin": origin_url, - "date": data.date_visit1, - "visit": ov1.visit, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data2 = { - "origin": origin_url, - "date": date_now2, - "visit": ov1.visit, - "status": "ongoing", - "metadata": None, - "snapshot": data.snapshot["id"], - } - data3 = { - "origin": origin_url, - "date": data.date_visit2, - "visit": ov2.visit, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data4 = { - "origin": origin_url, - "date": date_now4, - "visit": ov2.visit, - "status": "ongoing", - "metadata": None, - "snapshot": data.snapshot["id"], - } + ovs1 = OriginVisitStatus.from_dict( + { + "origin": origin_url, + "date": data.date_visit1, + "visit": ov1.visit, + "status": "created", + "metadata": None, + "snapshot": None, + } + ) + ovs2 = OriginVisitStatus.from_dict( + { + "origin": origin_url, + "date": date_now2, + "visit": ov1.visit, + "status": "ongoing", + "metadata": None, + "snapshot": data.snapshot["id"], + } + ) + ovs3 = OriginVisitStatus.from_dict( + { + "origin": origin_url, + "date": data.date_visit2, + "visit": ov2.visit, + "status": "created", + "metadata": None, + "snapshot": None, + } + ) + ovs4 = OriginVisitStatus.from_dict( + { + "origin": origin_url, + "date": date_now4, + "visit": ov2.visit, + "status": "ongoing", + "metadata": None, + "snapshot": data.snapshot["id"], + } + ) actual_objects = list(swh_storage.journal_writer.journal.objects) - assert actual_objects == [ + expected_objects = [ ("origin", Origin.from_dict(data.origin)), - ( - "origin_visit", - OriginVisit.from_dict({**data1, "type": data.type_visit1}), - ), - ("origin_visit_status", OriginVisitStatus.from_dict(data1)), + ("origin_visit", ov1), + ("origin_visit_status", ovs1), ("snapshot", Snapshot.from_dict(data.snapshot)), - ("origin_visit_status", OriginVisitStatus.from_dict(data2),), - ( - "origin_visit", - OriginVisit.from_dict({**data3, "type": data.type_visit2}), - ), - ("origin_visit_status", OriginVisitStatus.from_dict(data3)), - ("origin_visit_status", OriginVisitStatus.from_dict(data4),), + ("origin_visit_status", ovs2), + ("origin_visit", ov2), + ("origin_visit_status", ovs3), + ("origin_visit_status", ovs4), ] + for obj in expected_objects: + assert obj in actual_objects def test_snapshot_get_random(self, swh_storage): swh_storage.snapshot_add( @@ -3649,9 +3598,7 @@ swh_storage.origin_add([{"url": url} for url in self.ORIGINS]) origin_url = "https://github.com/user1/repo1" - visit = OriginVisit( - origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None - ) + visit = OriginVisit(origin=origin_url, date=now(), type="git",) swh_storage.origin_visit_add([visit]) assert swh_storage.origin_count("github", with_visit=False) == 3 @@ -3676,9 +3623,7 @@ swh_storage.snapshot_add([data.snapshot]) origin_url = "https://github.com/user1/repo1" - visit = OriginVisit( - origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None - ) + visit = OriginVisit(origin=origin_url, date=now(), type="git",) visit = swh_storage.origin_visit_add([visit])[0] swh_storage.origin_visit_status_add( [ @@ -3716,11 +3661,7 @@ if "visit" in obj: del obj["visit"] visit = OriginVisit( - origin=origin_url, - date=obj["date"], - type=obj["type"], - status="ongoing", - snapshot=None, + origin=origin_url, date=obj["date"], type=obj["type"], ) swh_storage.origin_visit_add([visit]) else: