Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7437770
D3342.id11861.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
36 KB
Subscribers
None
D3342.id11861.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,3 @@
swh.core[db,http] >= 0.0.94
-swh.model >= 0.3
+swh.model >= 0.3.4
swh.objstorage >= 0.0.40
diff --git a/sql/upgrades/155.sql b/sql/upgrades/155.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/155.sql
@@ -0,0 +1,34 @@
+-- SWH DB schema upgrade
+-- from_version: 154
+-- to_version: 155
+-- description: Drop obsolete origin-visit fields
+
+-- latest schema version
+insert into dbversion(version, release, description)
+ values(155, now(), 'Work In Progress');
+
+alter table origin_visit drop column snapshot;
+alter table origin_visit drop column metadata;
+alter table origin_visit drop column status;
+
+create or replace function swh_origin_visit_add(origin_url text, date timestamptz, type text)
+ returns bigint
+ language sql
+as $$
+ with origin_id as (
+ select id
+ from origin
+ where url = origin_url
+ ), last_known_visit as (
+ select coalesce(max(visit), 0) as visit
+ from origin_visit
+ where origin = (select id from origin_id)
+ )
+ insert into origin_visit (origin, date, type, visit)
+ values ((select id from origin_id), date, type,
+ (select visit from last_known_visit) + 1)
+ returning visit;
+$$;
+
+drop index origin_visit_type_status_date_idx;
+create index concurrently on origin_visit(type, date);
diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -103,15 +103,7 @@
],
"snapshot": ["id", "object_id"],
"origin": ["url"],
- "origin_visit": [
- "visit",
- "type",
- ("origin.url", "origin"),
- "date",
- "snapshot",
- "status",
- "metadata",
- ],
+ "origin_visit": ["visit", "type", ("origin.url", "origin"), "date",],
"origin_visit_status": [
"visit",
("origin.url", "origin"),
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -655,9 +655,6 @@
"visit",
"type",
"date",
- "status",
- "metadata",
- "snapshot",
]
@_prepared_statement("SELECT * FROM origin_visit WHERE origin = ? AND visit > ?")
diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py
--- a/swh/storage/cassandra/schema.py
+++ b/swh/storage/cassandra/schema.py
@@ -148,9 +148,6 @@
visit bigint,
date timestamp,
type text,
- status ascii,
- metadata text,
- snapshot blob,
PRIMARY KEY ((origin), visit)
);
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -809,11 +809,15 @@
self._cql_runner.origin_visit_add_one(visit)
assert visit.visit is not None
all_visits.append(visit)
-
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
- self._origin_visit_status_add(visit_status)
+ self._origin_visit_status_add(
+ OriginVisitStatus(
+ origin=visit.origin,
+ visit=visit.visit,
+ date=visit.date,
+ status="created",
+ snapshot=None,
+ )
+ )
return all_visits
@@ -881,7 +885,6 @@
**visit._asdict(),
"origin": visit.origin,
"date": visit.date.replace(tzinfo=datetime.timezone.utc),
- "metadata": (json.loads(visit.metadata) if visit.metadata else None),
}
def origin_visit_get(
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -428,8 +428,7 @@
revision_get_cols = revision_add_cols + ["parents"]
def origin_visit_add(self, origin, ts, type, cur=None):
- """Add a new origin_visit for origin origin at timestamp ts with
- status 'ongoing'.
+ """Add a new origin_visit for origin origin at timestamp ts.
Args:
origin: origin concerned by the visit
@@ -477,6 +476,13 @@
+ [jsonize(visit_status.metadata)],
)
+ origin_visit_upsert_cols = [
+ "origin",
+ "visit",
+ "date",
+ "type",
+ ]
+
def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None:
# doing an extra query like this is way simpler than trying to join
# the origin id in the query below
@@ -487,23 +493,14 @@
query = """INSERT INTO origin_visit ({cols}) VALUES ({values})
ON CONFLICT ON CONSTRAINT origin_visit_pkey DO
UPDATE SET {updates}""".format(
- cols=", ".join(self.origin_visit_get_cols),
- values=", ".join("%s" for col in self.origin_visit_get_cols),
+ cols=", ".join(self.origin_visit_upsert_cols),
+ values=", ".join("%s" for col in self.origin_visit_upsert_cols),
updates=", ".join(
- "{0}=excluded.{0}".format(col) for col in self.origin_visit_get_cols
+ "{0}=excluded.{0}".format(col) for col in self.origin_visit_upsert_cols
),
)
cur.execute(
- query,
- (
- origin_id,
- ov.visit,
- ov.date,
- ov.type,
- ov.status,
- ov.metadata,
- ov.snapshot,
- ),
+ query, (origin_id, ov.visit, ov.date, ov.type),
)
origin_visit_get_cols = [
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -798,10 +798,6 @@
while len(self._origin_visits[origin_url]) < visit.visit:
self._origin_visits[origin_url].append(None)
self._origin_visits[origin_url][visit.visit - 1] = visit
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
- self._origin_visit_status_add_one(visit_status)
else:
# visit ids are in the range [1, +inf[
visit_id = len(self._origin_visits[origin_url]) + 1
@@ -809,13 +805,17 @@
self.journal_writer.origin_visit_add([visit])
self._origin_visits[origin_url].append(visit)
visit_key = (origin_url, visit.visit)
-
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
- self._origin_visit_status_add_one(visit_status)
self._objects[visit_key].append(("origin_visit", None))
assert visit.visit is not None
+ self._origin_visit_status_add_one(
+ OriginVisitStatus(
+ origin=visit.origin,
+ visit=visit.visit,
+ date=visit.date,
+ status="created",
+ snapshot=None,
+ )
+ )
all_visits.append(visit)
return all_visits
diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql
--- a/swh/storage/sql/30-swh-schema.sql
+++ b/swh/storage/sql/30-swh-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(154, now(), 'Work In Progress');
+ values(155, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -280,20 +280,13 @@
origin bigint not null,
visit bigint not null,
date timestamptz not null,
- type text not null,
- -- remove those when done migrating the schema
- status origin_visit_state not null,
- metadata jsonb,
- snapshot sha1_git
+ type text not null
);
comment on column origin_visit.origin is 'Visited origin';
comment on column origin_visit.visit is 'Sequential visit number for the origin';
comment on column origin_visit.date is 'Visit timestamp';
comment on column origin_visit.type is 'Type of loader that did the visit (hg, git, ...)';
-comment on column origin_visit.status is '(Deprecated) Visit status';
-comment on column origin_visit.metadata is '(Deprecated) Optional origin visit metadata';
-comment on column origin_visit.snapshot is '(Deprecated) Optional snapshot of the origin visit. It can be partial.';
-- Crawling history of software origin visits by Software Heritage. Each
diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql
--- a/swh/storage/sql/40-swh-func.sql
+++ b/swh/storage/sql/40-swh-func.sql
@@ -592,9 +592,9 @@
from origin_visit
where origin = (select id from origin_id)
)
- insert into origin_visit (origin, date, type, visit, status)
+ insert into origin_visit (origin, date, type, visit)
values ((select id from origin_id), date, type,
- (select visit from last_known_visit) + 1, 'ongoing')
+ (select visit from last_known_visit) + 1)
returning visit;
$$;
diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql
--- a/swh/storage/sql/60-swh-indexes.sql
+++ b/swh/storage/sql/60-swh-indexes.sql
@@ -126,7 +126,7 @@
alter table origin_visit add primary key using index origin_visit_pkey;
create index concurrently on origin_visit(date);
-create index concurrently on origin_visit(type, status, date);
+create index concurrently on origin_visit(type, date);
alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid;
alter table origin_visit validate constraint origin_visit_origin_fkey;
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -823,9 +823,13 @@
all_visits.append(visit)
# Forced to write after for the case when the visit has no id
self.journal_writer.origin_visit_add([visit])
- visit_status_dict = visit.to_dict()
- visit_status_dict.pop("type")
- visit_status = OriginVisitStatus.from_dict(visit_status_dict)
+ visit_status = OriginVisitStatus(
+ origin=visit.origin,
+ visit=visit.visit,
+ date=visit.date,
+ status="created",
+ snapshot=None,
+ )
self._origin_visit_status_add(visit_status, db=db, cur=cur)
send_metric("origin_visit:add", count=nb_visits, method_name="origin_visit")
diff --git a/swh/storage/tests/test_backfill.py b/swh/storage/tests/test_backfill.py
--- a/swh/storage/tests/test_backfill.py
+++ b/swh/storage/tests/test_backfill.py
@@ -118,15 +118,12 @@
"type",
"origin",
"date",
- "snapshot",
- "status",
- "metadata",
]
assert (
query
== """
-select visit,type,origin.url as origin,date,snapshot,status,metadata
+select visit,type,origin.url as origin,date
from origin_visit
left join origin on origin_visit.origin=origin.id
where (origin_visit.origin) >= %s and (origin_visit.origin) < %s
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -3,7 +3,6 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import attr
import copy
from contextlib import contextmanager
import datetime
@@ -1445,15 +1444,7 @@
origin_url = origin["url"]
for date_visit in visits:
visit = swh_storage.origin_visit_add(
- [
- OriginVisit(
- origin=origin_url,
- date=date_visit,
- type=visit_type,
- status="ongoing",
- snapshot=None,
- )
- ]
+ [OriginVisit(origin=origin_url, date=date_visit, type=visit_type,)]
)[0]
swh_storage.origin_visit_status_add(
[
@@ -1489,15 +1480,7 @@
origin_url = origin["url"]
for date_visit in visits:
visit = swh_storage.origin_visit_add(
- [
- OriginVisit(
- origin=origin_url,
- date=date_visit,
- type=visit_type,
- status="ongoing",
- snapshot=None,
- )
- ]
+ [OriginVisit(origin=origin_url, date=date_visit, type=visit_type,)]
)[0]
swh_storage.origin_visit_status_add(
[
@@ -1660,18 +1643,10 @@
date_visit2 = round_to_milliseconds(date_visit2)
visit1 = OriginVisit(
- origin=origin1.url,
- date=date_visit,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=date_visit, type=data.type_visit1,
)
visit2 = OriginVisit(
- origin=origin1.url,
- date=date_visit2,
- type=data.type_visit2,
- status="partial",
- snapshot=None,
+ origin=origin1.url, date=date_visit2, type=data.type_visit2,
)
# add once
@@ -1681,38 +1656,44 @@
assert ov1 == origin_visit1
assert ov2 == origin_visit2
+ ovs1 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov1.visit,
+ date=date_visit,
+ status="created",
+ snapshot=None,
+ )
+ ovs2 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov2.visit,
+ date=date_visit2,
+ status="created",
+ snapshot=None,
+ )
+
actual_origin_visits = list(swh_storage.origin_visit_get(origin1.url))
expected_visits = [
- attr.evolve(visit1, visit=ov1.visit),
- attr.evolve(visit2, visit=ov2.visit),
+ {**ovs1.to_dict(), "type": ov1.type},
+ {**ovs2.to_dict(), "type": ov2.type},
]
- expected_visit_statuses = []
- for visit in expected_visits:
- visit_status = visit.to_dict()
- visit_status.pop("type")
- expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status))
-
assert len(expected_visits) == len(actual_origin_visits)
+
for visit in expected_visits:
- assert visit.to_dict() in actual_origin_visits
+ assert visit in actual_origin_visits
actual_objects = set(swh_storage.journal_writer.journal.objects)
# we write to the journal as many times as we call the endpoint
assert actual_objects == set(
[("origin", origin1)]
- + [("origin_visit", visit) for visit in expected_visits] * 2
- + [("origin_visit_status", ovs) for ovs in expected_visit_statuses] * 2
+ + [("origin_visit", visit) for visit in [ov1, ov2]] * 2
+ + [("origin_visit_status", ovs) for ovs in [ovs1, ovs2]]
)
def test_origin_visit_add_validation(self, swh_storage):
"""Unknown origin when adding visits should raise"""
visit = OriginVisit(
- origin="something-unknown",
- date=now(),
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin="something-unknown", date=now(), type=data.type_visit1,
)
with pytest.raises(StorageArgumentException, match="Unknown origin"):
swh_storage.origin_visit_add([visit])
@@ -1747,22 +1728,29 @@
ov1, ov2 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin1.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit1, type=data.type_visit1,
),
OriginVisit(
- origin=origin2.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin2.url, date=data.date_visit2, type=data.type_visit2,
),
]
)
+ ovs1 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov1.visit,
+ date=data.date_visit1,
+ status="created",
+ snapshot=None,
+ )
+ ovs2 = OriginVisitStatus(
+ origin=origin2.url,
+ visit=ov2.visit,
+ date=data.date_visit2,
+ status="created",
+ snapshot=None,
+ )
+
snapshot_id = data.snapshot["id"]
date_visit_now = now()
visit_status1 = OriginVisitStatus(
@@ -1804,21 +1792,14 @@
expected_origins = [origin1, origin2]
expected_visits = [ov1, ov2]
- expected_visit_statuses = []
- for visit in expected_visits: # out of origin-visit-add calls
- visit_status = visit.to_dict()
- visit_status.pop("type")
- expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status))
-
- # out of origin-visit-status add calls
- expected_visit_statuses += [visit_status1, visit_status2]
+ expected_visit_statuses = [ovs1, ovs2, visit_status1, visit_status2]
+
expected_objects = (
[("origin", o) for o in expected_origins]
+ [("origin_visit", v) for v in expected_visits]
+ [("origin_visit_status", ovs) for ovs in expected_visit_statuses]
)
- assert len(actual_objects) == len(expected_objects)
for obj in expected_objects:
assert obj in actual_objects
@@ -1831,15 +1812,18 @@
ov1 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin1.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit1, type=data.type_visit1,
),
]
)[0]
+ ovs1 = OriginVisitStatus(
+ origin=origin1.url,
+ visit=ov1.visit,
+ date=data.date_visit1,
+ status="created",
+ snapshot=None,
+ )
snapshot_id = data.snapshot["id"]
date_visit_now = now()
visit_status1 = OriginVisitStatus(
@@ -1866,57 +1850,62 @@
expected_origins = [origin1]
expected_visits = [ov1]
- expected_visit_statuses = []
- for visit in expected_visits: # out of origin-visit-add calls
- visit_status = visit.to_dict()
- visit_status.pop("type")
- expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status))
+ expected_visit_statuses = [ovs1, visit_status1, visit_status1]
# write twice in the journal
- expected_visit_statuses += [visit_status1] * 2
expected_objects = (
[("origin", o) for o in expected_origins]
+ [("origin_visit", v) for v in expected_visits]
+ [("origin_visit_status", ovs) for ovs in expected_visit_statuses]
)
- assert len(actual_objects) == len(expected_objects)
for obj in expected_objects:
assert obj in actual_objects
def test_origin_visit_find_by_date(self, swh_storage):
# given
- origin_url = swh_storage.origin_add_one(data.origin)
+ origin = Origin.from_dict(data.origin)
+ swh_storage.origin_add_one(data.origin)
visit1 = OriginVisit(
- origin=origin_url,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit1,
+ )
+ visit2 = OriginVisit(
+ origin=origin.url, date=data.date_visit3, type=data.type_visit2,
+ )
+ visit3 = OriginVisit(
+ origin=origin.url, date=data.date_visit2, type=data.type_visit3,
+ )
+ ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+
+ ovs1 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
date=data.date_visit2,
- type=data.type_visit1,
status="ongoing",
snapshot=None,
)
- visit2 = OriginVisit(
- origin=origin_url,
+ ovs2 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov2.visit,
date=data.date_visit3,
- type=data.type_visit2,
status="ongoing",
snapshot=None,
)
- visit3 = OriginVisit(
- origin=origin_url,
+ ovs3 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov3.visit,
date=data.date_visit2,
- type=data.type_visit3,
status="ongoing",
snapshot=None,
)
-
- _, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+ swh_storage.origin_visit_status_add([ovs1, ovs2, ovs3])
# Simple case
- visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit3)
+ visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit3)
assert visit["visit"] == ov2.visit
# There are two visits at the same date, the latest must be returned
- visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit2)
+ visit = swh_storage.origin_visit_find_by_date(origin.url, data.date_visit2)
assert visit["visit"] == ov3.visit
def test_origin_visit_find_by_date__unknown_origin(self, swh_storage):
@@ -1926,11 +1915,7 @@
origin_url = swh_storage.origin_add_one(data.origin)
origin_url2 = swh_storage.origin_add_one(data.origin2)
visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit2, type=data.type_visit2,
)
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
@@ -2031,31 +2016,20 @@
origin = Origin.from_dict(data.origin)
swh_storage.origin_add_one(origin)
visit1 = OriginVisit(
- origin=origin.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit1, type=data.type_visit1,
)
visit2 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
# Add a visit with the same date as the previous one
visit3 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
assert data.type_visit1 != data.type_visit2
assert data.date_visit1 < data.date_visit2
ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+
origin_visit1 = swh_storage.origin_visit_get_by(origin.url, ov1.visit)
origin_visit3 = swh_storage.origin_visit_get_by(origin.url, ov3.visit)
@@ -2086,26 +2060,14 @@
origin = Origin.from_dict(data.origin)
swh_storage.origin_add_one(origin)
visit1 = OriginVisit(
- origin=origin.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit1, type=data.type_visit1,
)
visit2 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
# Add a visit with the same date as the previous one
visit3 = OriginVisit(
- origin=origin.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin.url, date=data.date_visit2, type=data.type_visit2,
)
ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
@@ -2135,10 +2097,14 @@
)
]
)
- assert {
+ actual_visit = swh_storage.origin_visit_get_latest(
+ origin.url, require_snapshot=True
+ )
+ assert actual_visit == {
**origin_visit1,
"snapshot": complete_snapshot.id,
- } == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True)
+ "status": "ongoing", # visit1 has status created now
+ }
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url)
@@ -2188,6 +2154,7 @@
assert {
**origin_visit2,
"snapshot": empty_snapshot.id,
+ "status": "ongoing",
} == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True)
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url)
@@ -2226,11 +2193,13 @@
assert {
**origin_visit3,
"snapshot": complete_snapshot.id,
+ "status": "ongoing",
} == swh_storage.origin_visit_get_latest(origin.url)
assert {
**origin_visit3,
"snapshot": complete_snapshot.id,
+ "status": "ongoing",
} == swh_storage.origin_visit_get_latest(origin.url, require_snapshot=True)
def test_origin_visit_status_get_latest(self, swh_storage):
@@ -2242,18 +2211,10 @@
ov1, ov2 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin1.url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit1, type=data.type_visit1,
),
OriginVisit(
- origin=origin1.url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin1.url, date=data.date_visit2, type=data.type_visit2,
),
]
)
@@ -2283,7 +2244,7 @@
ovs3 = OriginVisitStatus(
origin=origin1.url,
visit=ov2.visit,
- date=data.date_visit2,
+ date=data.date_visit2 + datetime.timedelta(minutes=1), # to not be ignored
status="ongoing",
snapshot=None,
)
@@ -2374,11 +2335,7 @@
ov1 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit1, type=data.type_visit1,
)
]
)[0]
@@ -2406,33 +2363,37 @@
by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit)
assert by_ov == {**data.empty_snapshot, "next_branch": None}
- data1 = {
- "origin": origin_url,
- "date": data.date_visit1,
- "visit": ov1.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data2 = {
- "origin": origin_url,
- "date": date_now,
- "visit": ov1.visit,
- "status": "full",
- "metadata": None,
- "snapshot": data.empty_snapshot["id"],
- }
+ ovs1 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": data.date_visit1,
+ "visit": ov1.visit,
+ "status": "created",
+ "snapshot": None,
+ "metadata": None,
+ }
+ )
+ ovs2 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": date_now,
+ "visit": ov1.visit,
+ "status": "full",
+ "metadata": None,
+ "snapshot": data.empty_snapshot["id"],
+ }
+ )
actual_objects = list(swh_storage.journal_writer.journal.objects)
- assert actual_objects == [
+
+ expected_objects = [
("origin", Origin.from_dict(data.origin)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data1, "type": data.type_visit1},),
- ),
- ("origin_visit_status", OriginVisitStatus.from_dict(data1)),
+ ("origin_visit", ov1),
+ ("origin_visit_status", ovs1,),
("snapshot", Snapshot.from_dict(data.empty_snapshot)),
- ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
+ ("origin_visit_status", ovs2,),
]
+ for obj in expected_objects:
+ assert obj in actual_objects
def test_snapshot_add_get_complete(self, swh_storage):
origin_url = data.origin["url"]
@@ -2613,8 +2574,8 @@
origin=origin_url,
date=data.date_visit1,
type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ # status="ongoing",
+ # snapshot=None,
)
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
@@ -2737,8 +2698,8 @@
origin=origin_url,
date=data.date_visit1,
type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ # status="ongoing",
+ # snapshot=None,
)
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
visit_id = origin_visit1.visit
@@ -2770,11 +2731,7 @@
ov1 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit1, type=data.type_visit1,
)
]
)[0]
@@ -2799,11 +2756,7 @@
ov2 = swh_storage.origin_visit_add(
[
OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
+ origin=origin_url, date=data.date_visit2, type=data.type_visit2,
)
]
)[0]
@@ -2825,55 +2778,59 @@
by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov2.visit)
assert by_ov2 == {**data.snapshot, "next_branch": None}
- data1 = {
- "origin": origin_url,
- "date": data.date_visit1,
- "visit": ov1.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data2 = {
- "origin": origin_url,
- "date": date_now2,
- "visit": ov1.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": data.snapshot["id"],
- }
- data3 = {
- "origin": origin_url,
- "date": data.date_visit2,
- "visit": ov2.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data4 = {
- "origin": origin_url,
- "date": date_now4,
- "visit": ov2.visit,
- "status": "ongoing",
- "metadata": None,
- "snapshot": data.snapshot["id"],
- }
+ ovs1 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": data.date_visit1,
+ "visit": ov1.visit,
+ "status": "created",
+ "metadata": None,
+ "snapshot": None,
+ }
+ )
+ ovs2 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": date_now2,
+ "visit": ov1.visit,
+ "status": "ongoing",
+ "metadata": None,
+ "snapshot": data.snapshot["id"],
+ }
+ )
+ ovs3 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": data.date_visit2,
+ "visit": ov2.visit,
+ "status": "created",
+ "metadata": None,
+ "snapshot": None,
+ }
+ )
+ ovs4 = OriginVisitStatus.from_dict(
+ {
+ "origin": origin_url,
+ "date": date_now4,
+ "visit": ov2.visit,
+ "status": "ongoing",
+ "metadata": None,
+ "snapshot": data.snapshot["id"],
+ }
+ )
actual_objects = list(swh_storage.journal_writer.journal.objects)
- assert actual_objects == [
+ expected_objects = [
("origin", Origin.from_dict(data.origin)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data1, "type": data.type_visit1}),
- ),
- ("origin_visit_status", OriginVisitStatus.from_dict(data1)),
+ ("origin_visit", ov1),
+ ("origin_visit_status", ovs1),
("snapshot", Snapshot.from_dict(data.snapshot)),
- ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
- (
- "origin_visit",
- OriginVisit.from_dict({**data3, "type": data.type_visit2}),
- ),
- ("origin_visit_status", OriginVisitStatus.from_dict(data3)),
- ("origin_visit_status", OriginVisitStatus.from_dict(data4),),
+ ("origin_visit_status", ovs2),
+ ("origin_visit", ov2),
+ ("origin_visit_status", ovs3),
+ ("origin_visit_status", ovs4),
]
+ for obj in expected_objects:
+ assert obj in actual_objects
def test_snapshot_get_random(self, swh_storage):
swh_storage.snapshot_add(
@@ -3650,7 +3607,10 @@
origin_url = "https://github.com/user1/repo1"
visit = OriginVisit(
- origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
+ origin=origin_url,
+ date=now(),
+ type="git",
+ # status="ongoing", snapshot=None
)
swh_storage.origin_visit_add([visit])
@@ -3677,7 +3637,10 @@
swh_storage.snapshot_add([data.snapshot])
origin_url = "https://github.com/user1/repo1"
visit = OriginVisit(
- origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
+ origin=origin_url,
+ date=now(),
+ type="git",
+ # status="ongoing", snapshot=None
)
visit = swh_storage.origin_visit_add([visit])[0]
swh_storage.origin_visit_status_add(
@@ -3719,8 +3682,8 @@
origin=origin_url,
date=obj["date"],
type=obj["type"],
- status="ongoing",
- snapshot=None,
+ # status="ongoing",
+ # snapshot=None,
)
swh_storage.origin_visit_add([visit])
else:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 14, 6:05 AM (21 h, 43 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221809
Attached To
D3342: storage*: Drop obsolete fields from origin_visit
Event Timeline
Log In to Comment