Page MenuHomeSoftware Heritage

D3342.id11861.diff
No OneTemporary

D3342.id11861.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,3 @@
swh.core[db,http] >= 0.0.94
-swh.model >= 0.3
+swh.model >= 0.3.4
swh.objstorage >= 0.0.40
diff --git a/sql/upgrades/155.sql b/sql/upgrades/155.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/155.sql
@@ -0,0 +1,34 @@
+-- SWH DB schema upgrade
+-- from_version: 154
+-- to_version: 155
+-- description: Drop obsolete origin-visit fields
+
+-- latest schema version
+insert into dbversion(version, release, description)
+ values(155, now(), 'Work In Progress');
+
+alter table origin_visit drop column snapshot;
+alter table origin_visit drop column metadata;
+alter table origin_visit drop column status;
+
+create or replace function swh_origin_visit_add(origin_url text, date timestamptz, type text)
+ returns bigint
+ language sql
+as $$
+ with origin_id as (
+ select id
+ from origin
+ where url = origin_url
+ ), last_known_visit as (
+ select coalesce(max(visit), 0) as visit
+ from origin_visit
+ where origin = (select id from origin_id)
+ )
+ insert into origin_visit (origin, date, type, visit)
+ values ((select id from origin_id), date, type,
+ (select visit from last_known_visit) + 1)
+ returning visit;
+$$;
+
+drop index origin_visit_type_status_date_idx;
+create index concurrently on origin_visit(type, date);
diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -103,15 +103,7 @@
],
"snapshot": ["id", "object_id"],
"origin": ["url"],
- "origin_visit": [
- "visit",
- "type",
- ("origin.url", "origin"),
- "date",
- "snapshot",
- "status",
- "metadata",
- ],
+ "origin_visit": ["visit", "type", ("origin.url", "origin"), "date",],
"origin_visit_status": [
"visit",
("origin.url", "origin"),
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -655,9 +655,6 @@
"visit",
"type",
"date",
- "status",
- "metadata",
- "snapshot",
]
@_prepared_statement("SELECT * FROM origin_visit WHERE origin = ? AND visit > ?")
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -148,9 +148,6 @@
visit bigint,
date timestamp,
type text,
- status ascii,
- metadata text,
- snapshot blob,
PRIMARY KEY ((origin), visit)
);
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -809,11 +809,15 @@
self._cql_runner.origin_visit_add_one(visit)
assert visit.visit is not None
all_visits.append(visit)
-
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
- self._origin_visit_status_add(visit_status)
+ self._origin_visit_status_add(
+ OriginVisitStatus(
+ origin=visit.origin,
+ visit=visit.visit,
+ date=visit.date,
+ status="created",
+ snapshot=None,
+ )
+ )
return all_visits
@@ -881,7 +885,6 @@
**visit._asdict(),
"origin": visit.origin,
"date": visit.date.replace(tzinfo=datetime.timezone.utc),
- "metadata": (json.loads(visit.metadata) if visit.metadata else None),
}
def origin_visit_get(
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -428,8 +428,7 @@
revision_get_cols = revision_add_cols + ["parents"]
def origin_visit_add(self, origin, ts, type, cur=None):
- """Add a new origin_visit for origin origin at timestamp ts with
- status 'ongoing'.
+ """Add a new origin_visit for origin origin at timestamp ts.
Args:
origin: origin concerned by the visit
@@ -477,6 +476,13 @@
+ [jsonize(visit_status.metadata)],
)
+ origin_visit_upsert_cols = [
+ "origin",
+ "visit",
+ "date",
+ "type",
+ ]
+
def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None:
# doing an extra query like this is way simpler than trying to join
# the origin id in the query below
@@ -487,23 +493,14 @@
query = """INSERT INTO origin_visit ({cols}) VALUES ({values})
ON CONFLICT ON CONSTRAINT origin_visit_pkey DO
UPDATE SET {updates}""".format(
- cols=", ".join(self.origin_visit_get_cols),
- values=", ".join("%s" for col in self.origin_visit_get_cols),
+ cols=", ".join(self.origin_visit_upsert_cols),
+ values=", ".join("%s" for col in self.origin_visit_upsert_cols),
updates=", ".join(
- "{0}=excluded.{0}".format(col) for col in self.origin_visit_get_cols
+ "{0}=excluded.{0}".format(col) for col in self.origin_visit_upsert_cols
),
)
cur.execute(
- query,
- (
- origin_id,
- ov.visit,
- ov.date,
- ov.type,
- ov.status,
- ov.metadata,
- ov.snapshot,
- ),
+ query, (origin_id, ov.visit, ov.date, ov.type),
)
origin_visit_get_cols = [
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -798,10 +798,6 @@
while len(self._origin_visits[origin_url]) < visit.visit:
self._origin_visits[origin_url].append(None)
self._origin_visits[origin_url][visit.visit - 1] = visit
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
- self._origin_visit_status_add_one(visit_status)
else:
# visit ids are in the range [1, +inf[
visit_id = len(self._origin_visits[origin_url]) + 1
@@ -809,13 +805,17 @@
self.journal_writer.origin_visit_add([visit])
self._origin_visits[origin_url].append(visit)
visit_key = (origin_url, visit.visit)
-
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
- self._origin_visit_status_add_one(visit_status)
self._objects[visit_key].append(("origin_visit", None))
assert visit.visit is not None
+ self._origin_visit_status_add_one(
+ OriginVisitStatus(
+ origin=visit.origin,
+ visit=visit.visit,
+ date=visit.date,
+ status="created",
+ snapshot=None,
+ )
+ )
all_visits.append(visit)
return all_visits
diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql
--- a/swh/storage/sql/30-swh-schema.sql
+++ b/swh/storage/sql/30-swh-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(154, now(), 'Work In Progress');
+ values(155, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -280,20 +280,13 @@
origin bigint not null,
visit bigint not null,
date timestamptz not null,
- type text not null,
- -- remove those when done migrating the schema
- status origin_visit_state not null,
- metadata jsonb,
- snapshot sha1_git
+ type text not null
);
comment on column origin_visit.origin is 'Visited origin';
comment on column origin_visit.visit is 'Sequential visit number for the origin';
comment on column origin_visit.date is 'Visit timestamp';
comment on column origin_visit.type is 'Type of loader that did the visit (hg, git, ...)';
-comment on column origin_visit.status is '(Deprecated) Visit status';
-comment on column origin_visit.metadata is '(Deprecated) Optional origin visit metadata';
-comment on column origin_visit.snapshot is '(Deprecated) Optional snapshot of the origin visit. It can be partial.';
-- Crawling history of software origin visits by Software Heritage. Each
diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql
--- a/swh/storage/sql/40-swh-func.sql
+++ b/swh/storage/sql/40-swh-func.sql
@@ -592,9 +592,9 @@
from origin_visit
where origin = (select id from origin_id)
)
- insert into origin_visit (origin, date, type, visit, status)
+ insert into origin_visit (origin, date, type, visit)
values ((select id from origin_id), date, type,
- (select visit from last_known_visit) + 1, 'ongoing')
+ (select visit from last_known_visit) + 1)
returning visit;
$$;
diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql
--- a/swh/storage/sql/60-swh-indexes.sql
+++ b/swh/storage/sql/60-swh-indexes.sql
@@ -126,7 +126,7 @@
alter table origin_visit add primary key using index origin_visit_pkey;
create index concurrently on origin_visit(date);
-create index concurrently on origin_visit(type, status, date);
+create index concurrently on origin_visit(type, date);
alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid;
alter table origin_visit validate constraint origin_visit_origin_fkey;
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -823,9 +823,13 @@
all_visits.append(visit)
# Forced to write after for the case when the visit has no id
self.journal_writer.origin_visit_add([visit])
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
+ visit_status = OriginVisitStatus(
+ origin=visit.origin,
+ visit=visit.visit,
+ date=visit.date,
+ status="created",
+ snapshot=None,
+ )
self._origin_visit_status_add(visit_status, db=db, cur=cur)
send_metric("origin_visit:add", count=nb_visits, method_name="origin_visit")
diff --git a/swh/storage/tests/test_backfill.py b/swh/storage/tests/test_backfill.py
--- a/swh/storage/tests/test_backfill.py
+++ b/swh/storage/tests/test_backfill.py
@@ -118,15 +118,12 @@
"type",
"origin",
"date",
- "snapshot",
- "status",
- "metadata",
]
assert (
query
== """
-select visit,type,origin.url as origin,date,snapshot,status,metadata
+select visit,type,origin.url as origin,date
from origin_visit
left join origin on origin_visit.origin=origin.id
where (origin_visit.origin) >= %s and (origin_visit.origin) < %s
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -3,7 +3,6 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import attr
import copy
from contextlib import contextmanager
import datetime
@@ -1445,15 +1444,7 @@
origin_url = origin["url"]
for date_visit in visits:
visit = swh_storage.origin_visit_add(
- [
- OriginVisit(
- origin=origin_url,
- date=date_visit,
- type=visit_type,
- status="ongoing",
- snapshot=None,
- )
- ]
+ [OriginVisit(origin=origin_url, date=date_visit, type=visit_type,)]
)[0]
swh_storage.origin_visit_status_add(
[
@@ -1489,15 +1480,7 @@
origin_url = origin["url"]
for date_visit in visits:
visit = swh_storage.origin_visit_add(
- [
- OriginVisit(
- origin=origin_url,
- date=date_visit,
- type=visit_type,
- status="ongoing",
- snapshot=None,
- )
- ]
+ [OriginVisit(origin=origin_url, date=date_visit, type=visit_type,)]
)[0]
swh_storage.origin_visit_status_add(
[
@@ -1660,18 +1643,10 @@
date_visit2 = round_to_milliseconds(date_visit2)
visit1 = OriginVisit(
- origin=origin1.url,
- date=date_visit,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=date_visit, type=data.type_visit1,
)
visit2 = OriginVisit(
- origin=origin1.url,
- date=date_visit2,
- type=data.type_visit2,
- status="partial",
- snapshot=None,
+ origin=origin1.url, date=date_visit2, type=data.type_visit2,
)
# add once
@@ -1681,38 +1656,44 @@
assert ov1 == origin_visit1
assert ov2 == origin_visit2
+ ovs1 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov1.visit,
+ date=date_visit,
+ status="created",
+ snapshot=None,
+ )
+ ovs2 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov2.visit,
+ date=date_visit2,
+ status="created",
+ snapshot=None,
+ )
+
actual_origin_visits = list(swh_storage.origin_visit_get(origin1.url))
expected_visits = [
- attr.evolve(visit1, visit=ov1.visit),
- attr.evolve(visit2, visit=ov2.visit),
+ {**ovs1.to_dict(), "type": ov1.type},
+ {**ovs2.to_dict(), "type": ov2.type},
]
- expected_visit_statuses = []
- for visit in expected_visits:
- visit_status = visit.to_dict()
- visit_status.pop("type")
- expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status))
-
assert len(expected_visits) == len(actual_origin_visits)
+
for visit in expected_visits:
- assert visit.to_dict() in actual_origin_visits
+ assert visit in actual_origin_visits
actual_objects = set(swh_storage.journal_writer.journal.objects)
# we write to the journal as many times as we call the endpoint
assert actual_objects == set(
[("origin", origin1)]
- + [("origin_visit", visit) for visit in expected_visits] * 2
- + [("origin_visit_status", ovs) for ovs in expected_visit_statuses] * 2
+ + [("origin_visit", visit) for visit in [ov1, ov2]] * 2
+ + [("origin_visit_status", ovs) for ovs in [ovs1, ovs2]]
)
def test_origin_visit_add_validation(self, swh_storage):
"""Unknown origin when adding visits should raise"""
visit = OriginVisit(
- origin="something-unknown",
- date=now(),
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin="something-unknown", date=now(), type=data.type_visit1,
)
with pytest.raises(StorageArgumentException, match="Unknown origin"):
swh_storage.origin_visit_add([visit])
@@ -1747,22 +1728,29 @@
ov1, ov2 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin1.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit1, type=data.type_visit1,
),
OriginVisit(
- origin=origin2.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin2.url, date=data.date_visit2, type=data.type_visit2,
),
]
)
+ ovs1 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov1.visit,
+ date=data.date_visit1,
+ status="created",
+ snapshot=None,
+ )
+ ovs2 = OriginVisitStatus(
+ origin=origin2.url,
+ visit=ov2.visit,
+ date=data.date_visit2,
+ status="created",
+ snapshot=None,
+ )
+
snapshot_id = data.snapshot["id"]
date_visit_now = now()
visit_status1 = OriginVisitStatus(
@@ -1804,21 +1792,14 @@
expected_origins = [origin1, origin2]
expected_visits = [ov1, ov2]
- expected_visit_statuses = []
- for visit in expected_visits: # out of origin-visit-add calls
- visit_status = visit.to_dict()
- visit_status.pop("type")
- expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status))
-
- # out of origin-visit-status add calls
- expected_visit_statuses += [visit_status1, visit_status2]
+ expected_visit_statuses = [ovs1, ovs2, visit_status1, visit_status2]
+
expected_objects = (
[("origin", o) for o in expected_origins]
+ [("origin_visit", v) for v in expected_visits]
+ [("origin_visit_status", ovs) for ovs in expected_visit_statuses]
)
- assert len(actual_objects) == len(expected_objects)
for obj in expected_objects:
assert obj in actual_objects
@@ -1831,15 +1812,18 @@
ov1 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin1.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit1, type=data.type_visit1,
),
]
)[0]
+ ovs1 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov1.visit,
+ date=data.date_visit1,
+ status="created",
+ snapshot=None,
+ )
snapshot_id = data.snapshot["id"]
date_visit_now = now()
visit_status1 = OriginVisitStatus(
@@ -1866,57 +1850,62 @@
expected_origins = [origin1]
expected_visits = [ov1]
- expected_visit_statuses = []
- for visit in expected_visits: # out of origin-visit-add calls
- visit_status = visit.to_dict()
- visit_status.pop("type")
- expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status))
+ expected_visit_statuses = [ovs1, visit_status1, visit_status1]
# write twice in the journal
- expected_visit_statuses += [visit_status1] * 2
expected_objects = (
[("origin", o) for o in expected_origins]
+ [("origin_visit", v) for v in expected_visits]
+ [("origin_visit_status", ovs) for ovs in expected_visit_statuses]
)
- assert len(actual_objects) == len(expected_objects)
for obj in expected_objects:
assert obj in actual_objects
def test_origin_visit_find_by_date(self, swh_storage):
# given
- origin_url = swh_storage.origin_add_one(data.origin)
+ origin = Origin.from_dict(data.origin)
+ swh_storage.origin_add_one(data.origin)
visit1 = OriginVisit(
- origin=origin_url,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit1,
+ )
+ visit2 = OriginVisit(
+ origin=origin.url, date=data.date_visit3, type=data.type_visit2,
+ )
+ visit3 = OriginVisit(
+ origin=origin.url, date=data.date_visit2, type=data.type_visit3,
+ )
+ ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+
+ ovs1 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
date=data.date_visit2,
- type=data.type_visit1,
status="ongoing",
snapshot=None,
)
- visit2 = OriginVisit(
- origin=origin_url,
+ ovs2 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov2.visit,
date=data.date_visit3,
- type=data.type_visit2,
status="ongoing",
snapshot=None,
)
- visit3 = OriginVisit(
- origin=origin_url,
+ ovs3 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov3.visit,
date=data.date_visit2,
- type=data.type_visit3,
status="ongoing",
snapshot=None,
)
-
- _, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+ swh_storage.origin_visit_status_add([ovs1, ovs2, ovs3])
# Simple case
- visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit3)
+ visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit3)
assert visit["visit"] == ov2.visit
# There are two visits at the same date, the latest must be returned
- visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit2)
+ visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit2)
assert visit["visit"] == ov3.visit
def test_origin_visit_find_by_date__unknown_origin(self, swh_storage):
@@ -1926,11 +1915,7 @@
origin_url = swh_storage.origin_add_one(data.origin)
origin_url2 = swh_storage.origin_add_one(data.origin2)
visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit2, type=data.type_visit2,
)
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
@@ -2031,31 +2016,20 @@
origin = Origin.from_dict(data.origin)
swh_storage.origin_add_one(origin)
visit1 = OriginVisit(
- origin=origin.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit1, type=data.type_visit1,
)
visit2 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
# Add a visit with the same date as the previous one
visit3 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
assert data.type_visit1 != data.type_visit2
assert data.date_visit1 < data.date_visit2
ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+
origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit)
origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit)
@@ -2086,26 +2060,14 @@
origin = Origin.from_dict(data.origin)
swh_storage.origin_add_one(origin)
visit1 = OriginVisit(
- origin=origin.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit1, type=data.type_visit1,
)
visit2 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
# Add a visit with the same date as the previous one
visit3 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
@@ -2135,10 +2097,14 @@
)
]
)
- assert {
+ actual_visit = swh_storage.origin_visit_get_latest(
+ origin.url, require_snapshot=True
+ )
+ assert actual_visit == {
**origin_visit1,
"snapshot": complete_snapshot.id,
- } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True)
+ "status": "ongoing", # visit1 has status created now
+ }
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url)
@@ -2188,6 +2154,7 @@
assert {
**origin_visit2,
"snapshot": empty_snapshot.id,
+ "status": "ongoing",
} == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True)
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url)
@@ -2226,11 +2193,13 @@
assert {
**origin_visit3,
"snapshot": complete_snapshot.id,
+ "status": "ongoing",
} == swh_storage.origin_visit_get_latest(origin.url)
assert {
**origin_visit3,
"snapshot": complete_snapshot.id,
+ "status": "ongoing",
} == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True)
def test_origin_visit_status_get_latest(self, swh_storage):
@@ -2242,18 +2211,10 @@
ov1, ov2 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin1.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit1, type=data.type_visit1,
),
OriginVisit(
- origin=origin1.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit2, type=data.type_visit2,
),
]
)
@@ -2283,7 +2244,7 @@
ovs3 = OriginVisitStatus(
origin=origin1.url,
visit=ov2.visit,
- date=data.date_visit2,
+ date=data.date_visit2 + datetime.timedelta(minutes=1), # to not be ignored
status="ongoing",
snapshot=None,
)
@@ -2374,11 +2335,7 @@
ov1 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit1, type=data.type_visit1,
)
]
)[0]
@@ -2406,33 +2363,37 @@
by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit)
assert by_ov == {**data.empty_snapshot, "next_branch": None}
- data1 = {
- "origin": origin_url,
- "date": data.date_visit1,
- "visit": ov1.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data2 = {
- "origin": origin_url,
- "date": date_now,
- "visit": ov1.visit,
- "status": "full",
- "metadata": None,
- "snapshot": data.empty_snapshot["id"],
- }
+ ovs1 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": data.date_visit1,
+ "visit": ov1.visit,
+ "status": "created",
+ "snapshot": None,
+ "metadata": None,
+ }
+ )
+ ovs2 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": date_now,
+ "visit": ov1.visit,
+ "status": "full",
+ "metadata": None,
+ "snapshot": data.empty_snapshot["id"],
+ }
+ )
actual_objects = list(swh_storage.journal_writer.journal.objects)
- assert actual_objects == [
+
+ expected_objects = [
("origin", Origin.from_dict(data.origin)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data1, "type": data.type_visit1},),
- ),
- ("origin_visit_status", OriginVisitStatus.from_dict(data1)),
+ ("origin_visit", ov1),
+ ("origin_visit_status", ovs1,),
("snapshot", Snapshot.from_dict(data.empty_snapshot)),
- ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
+ ("origin_visit_status", ovs2,),
]
+ for obj in expected_objects:
+ assert obj in actual_objects
def test_snapshot_add_get_complete(self, swh_storage):
origin_url = data.origin["url"]
@@ -2613,8 +2574,8 @@
origin=origin_url,
date=data.date_visit1,
type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ # status="ongoing",
+ # snapshot=None,
)
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
@@ -2737,8 +2698,8 @@
origin=origin_url,
date=data.date_visit1,
type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ # status="ongoing",
+ # snapshot=None,
)
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
visit_id = origin_visit1.visit
@@ -2770,11 +2731,7 @@
ov1 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit1, type=data.type_visit1,
)
]
)[0]
@@ -2799,11 +2756,7 @@
ov2 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit2, type=data.type_visit2,
)
]
)[0]
@@ -2825,55 +2778,59 @@
by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov2.visit)
assert by_ov2 == {**data.snapshot, "next_branch": None}
- data1 = {
- "origin": origin_url,
- "date": data.date_visit1,
- "visit": ov1.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data2 = {
- "origin": origin_url,
- "date": date_now2,
- "visit": ov1.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": data.snapshot["id"],
- }
- data3 = {
- "origin": origin_url,
- "date": data.date_visit2,
- "visit": ov2.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data4 = {
- "origin": origin_url,
- "date": date_now4,
- "visit": ov2.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": data.snapshot["id"],
- }
+ ovs1 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": data.date_visit1,
+ "visit": ov1.visit,
+ "status": "created",
+ "metadata": None,
+ "snapshot": None,
+ }
+ )
+ ovs2 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": date_now2,
+ "visit": ov1.visit,
+ "status": "ongoing",
+ "metadata": None,
+ "snapshot": data.snapshot["id"],
+ }
+ )
+ ovs3 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": data.date_visit2,
+ "visit": ov2.visit,
+ "status": "created",
+ "metadata": None,
+ "snapshot": None,
+ }
+ )
+ ovs4 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": date_now4,
+ "visit": ov2.visit,
+ "status": "ongoing",
+ "metadata": None,
+ "snapshot": data.snapshot["id"],
+ }
+ )
actual_objects = list(swh_storage.journal_writer.journal.objects)
- assert actual_objects == [
+ expected_objects = [
("origin", Origin.from_dict(data.origin)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data1, "type": data.type_visit1}),
- ),
- ("origin_visit_status", OriginVisitStatus.from_dict(data1)),
+ ("origin_visit", ov1),
+ ("origin_visit_status", ovs1),
("snapshot", Snapshot.from_dict(data.snapshot)),
- ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
- (
- "origin_visit",
- OriginVisit.from_dict({**data3, "type": data.type_visit2}),
- ),
- ("origin_visit_status", OriginVisitStatus.from_dict(data3)),
- ("origin_visit_status", OriginVisitStatus.from_dict(data4),),
+ ("origin_visit_status", ovs2),
+ ("origin_visit", ov2),
+ ("origin_visit_status", ovs3),
+ ("origin_visit_status", ovs4),
]
+ for obj in expected_objects:
+ assert obj in actual_objects
def test_snapshot_get_random(self, swh_storage):
swh_storage.snapshot_add(
@@ -3650,7 +3607,10 @@
origin_url = "https://github.com/user1/repo1"
visit = OriginVisit(
- origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
+ origin=origin_url,
+ date=now(),
+ type="git",
+ # status="ongoing", snapshot=None
)
swh_storage.origin_visit_add([visit])
@@ -3677,7 +3637,10 @@
swh_storage.snapshot_add([data.snapshot])
origin_url = "https://github.com/user1/repo1"
visit = OriginVisit(
- origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
+ origin=origin_url,
+ date=now(),
+ type="git",
+ # status="ongoing", snapshot=None
)
visit = swh_storage.origin_visit_add([visit])[0]
swh_storage.origin_visit_status_add(
@@ -3719,8 +3682,8 @@
origin=origin_url,
date=obj["date"],
type=obj["type"],
- status="ongoing",
- snapshot=None,
+ # status="ongoing",
+ # snapshot=None,
)
swh_storage.origin_visit_add([visit])
else:

File Metadata

Mime Type
text/plain
Expires
Mon, Apr 14, 6:05 AM (21 h, 43 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221809

Event Timeline