Page MenuHomeSoftware Heritage

D3276.diff
No OneTemporary

D3276.diff

diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -838,49 +838,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add(visit_status)
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- ):
- origin_url = origin # TODO: rename the argument
-
- # Get the existing data of the visit
- visit_ = self.origin_visit_get_by(origin_url, visit_id)
- if not visit_:
- raise StorageArgumentException("This origin visit does not exist.")
- with convert_validation_exceptions():
- visit = OriginVisit.from_dict(visit_)
-
- updates: Dict[str, Any] = {"status": status}
- if metadata and metadata != visit.metadata:
- updates["metadata"] = metadata
- if snapshot and snapshot != visit.snapshot:
- updates["snapshot"] = snapshot
-
- with convert_validation_exceptions():
- visit = attr.evolve(visit, **updates)
-
- self.journal_writer.origin_visit_update([visit])
-
- last_visit_update = self._origin_visit_get_updated(visit.origin, visit.visit)
- assert last_visit_update is not None
-
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date or now(),
- status=status,
- snapshot=snapshot or last_visit_update["snapshot"],
- metadata=metadata or last_visit_update["metadata"],
- )
- self._origin_visit_status_add(visit_status)
-
def _origin_visit_merge(
self, visit: Dict[str, Any], visit_status: Dict[str, Any]
) -> Dict[str, Any]:
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -477,31 +477,6 @@
+ [jsonize(visit_status.metadata)],
)
- def origin_visit_update(self, origin_id, visit_id, updates, cur=None):
- """Update origin_visit's status."""
- cur = self._cursor(cur)
- update_cols = []
- values = []
- where = ["origin.id = origin_visit.origin", "origin.url=%s", "visit=%s"]
- where_values = [origin_id, visit_id]
- if "status" in updates:
- update_cols.append("status=%s")
- values.append(updates.pop("status"))
- if "metadata" in updates:
- update_cols.append("metadata=%s")
- values.append(jsonize(updates.pop("metadata")))
- if "snapshot" in updates:
- update_cols.append("snapshot=%s")
- values.append(updates.pop("snapshot"))
- assert not updates, "Unknown fields: %r" % updates
- query = """UPDATE origin_visit
- SET {update_cols}
- FROM origin
- WHERE {where}""".format(
- **{"update_cols": ", ".join(update_cols), "where": " AND ".join(where)}
- )
- cur.execute(query, (*values, *where_values))
-
def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None:
# doing an extra query like this is way simpler than trying to join
# the origin id in the query below
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -853,56 +853,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add_one(visit_status)
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- ):
- origin_url = self._get_origin_url(origin)
- if origin_url is None:
- raise StorageArgumentException("Unknown origin.")
-
- try:
- visit = self._origin_visits[origin_url][visit_id - 1]
- except IndexError:
- raise StorageArgumentException("Unknown visit_id for this origin") from None
-
- updates: Dict[str, Any] = {
- "status": status,
- }
- if metadata and metadata != visit.metadata:
- updates["metadata"] = metadata
- if snapshot and snapshot != visit.snapshot:
- updates["snapshot"] = snapshot
-
- if updates:
- with convert_validation_exceptions():
- updated_visit = OriginVisit.from_dict({**visit.to_dict(), **updates})
- self.journal_writer.origin_visit_update([updated_visit])
-
- self._origin_visits[origin_url][visit_id - 1] = updated_visit
-
- # Retrieve the previous visit status
- assert visit.visit is not None
-
- last_visit_status = self._origin_visit_get_updated(origin, visit_id)
- assert last_visit_status is not None
-
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date or now(),
- status=status,
- snapshot=snapshot or last_visit_status.snapshot,
- metadata=metadata or last_visit_status.metadata,
- )
- self._origin_visit_status_add_one(visit_status)
-
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None:
for visit in visits:
if visit.visit is None:
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -810,33 +810,6 @@
"""
...
- @remote_api_endpoint("origin/visit/update")
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- ):
- """Update an origin_visit's status.
-
- Args:
- origin (str): visited origin's URL
- visit_id: Visit's id
- status: Visit's new status
- metadata: Data associated to the visit
- snapshot (sha1_git): identifier of the snapshot to add to
- the visit
- date: Update date
-
- Returns:
- None
-
- """
- ...
-
@remote_api_endpoint("origin/visit/upsert")
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None:
"""Add a origin_visits with a specific id and with all its data.
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -878,64 +878,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add(visit_status, db, cur)
- @timed
- @db_transaction()
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- db=None,
- cur=None,
- ):
- if not isinstance(origin, str):
- raise StorageArgumentException(
- "origin must be a string, not %r" % (origin,)
- )
- origin_url = origin
- visit = db.origin_visit_get(origin_url, visit_id, cur=cur)
-
- if not visit:
- raise StorageArgumentException("Invalid visit_id for this origin.")
-
- visit = dict(zip(db.origin_visit_get_cols, visit))
-
- updates: Dict[str, Any] = {
- "status": status,
- }
- if metadata and metadata != visit["metadata"]:
- updates["metadata"] = metadata
- if snapshot and snapshot != visit["snapshot"]:
- updates["snapshot"] = snapshot
-
- if updates:
- with convert_validation_exceptions():
- updated_visit = OriginVisit.from_dict({**visit, **updates})
- self.journal_writer.origin_visit_update([updated_visit])
-
- # Write updates to origin visit (backward compatibility)
- db.origin_visit_update(origin, visit_id, updates)
-
- # Add new origin visit status
- last_visit_status = self._origin_visit_get_updated(
- origin, visit_id, db=db, cur=cur
- )
- assert last_visit_status is not None
-
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date or now(),
- status=status,
- snapshot=snapshot or last_visit_status["snapshot"],
- metadata=metadata or last_visit_status["metadata"],
- )
- self._origin_visit_status_add(visit_status, db=db, cur=cur)
-
def _origin_visit_get_updated(
self, origin: str, visit_id: int, db, cur
) -> Optional[Dict[str, Any]]:
diff --git a/swh/storage/tests/test_kafka_writer.py b/swh/storage/tests/test_kafka_writer.py
--- a/swh/storage/tests/test_kafka_writer.py
+++ b/swh/storage/tests/test_kafka_writer.py
@@ -53,14 +53,8 @@
for obj in objs:
assert isinstance(obj, OriginVisit)
storage.origin_add_one(Origin(url=obj.origin))
- visit = method([obj])[0]
+ method([obj])
expected_messages += 1 + 1 # 1 visit + 1 visit status
-
- obj_d = obj.to_dict()
- for k in ("visit", "origin", "date", "type"):
- del obj_d[k]
- storage.origin_visit_update(obj.origin, visit.visit, **obj_d)
- expected_messages += 1 + 1 # 1 visit update + 1 visit status
else:
assert False, obj_type
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -679,87 +679,6 @@
assert mock_memory.call_count == 1
-def test_retrying_proxy_swh_storage_origin_visit_update(swh_storage, sample_data):
- """Standard origin_visit_update works as before
-
- """
- sample_origin = sample_data["origin"][0]
- origin_url = swh_storage.origin_add_one(sample_origin)
- visit = OriginVisit(
- origin=origin_url, date=date_visit1, type="hg", status="ongoing", snapshot=None
- )
-
- origin_visit = swh_storage.origin_visit_add([visit])[0]
-
- ov = next(swh_storage.origin_visit_get(origin_url))
- assert ov["origin"] == origin_url
- assert ov["visit"] == origin_visit.visit
- assert ov["status"] == "ongoing"
- assert ov["snapshot"] is None
- assert ov["metadata"] is None
-
- swh_storage.origin_visit_update(origin_url, origin_visit.visit, status="full")
-
- ov = next(swh_storage.origin_visit_get(origin_url))
- assert ov["origin"] == origin_url
- assert ov["visit"] == origin_visit.visit
- assert ov["status"] == "full"
- assert ov["snapshot"] is None
- assert ov["metadata"] is None
-
-
-def test_retrying_proxy_swh_storage_origin_visit_update_retry(
- monkeypatch_sleep, swh_storage, sample_data, mocker, fake_hash_collision
-):
- """Multiple retries for hash collision and psycopg2 error but finally ok
-
- """
- sample_origin = sample_data["origin"][1]
- origin_url = sample_origin["url"]
-
- mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_visit_update"
- )
- mock_memory.side_effect = [
- # first try goes ko
- fake_hash_collision,
- # second try goes ko
- psycopg2.IntegrityError("origin already inserted"),
- # ok then!
- {"origin": origin_url, "visit": 1},
- ]
-
- visit_id = 1
- swh_storage.origin_visit_update(origin_url, visit_id, status="full")
-
- mock_memory.assert_has_calls(
- [
- call(origin_url, visit_id, metadata=None, snapshot=None, status="full"),
- call(origin_url, visit_id, metadata=None, snapshot=None, status="full"),
- call(origin_url, visit_id, metadata=None, snapshot=None, status="full"),
- ]
- )
-
-
-def test_retrying_proxy_swh_storage_origin_visit_update_failure(
- swh_storage, sample_data, mocker
-):
- """Unfiltered errors are raising without retry
-
- """
- mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_visit_update"
- )
- mock_memory.side_effect = StorageArgumentException("Refuse to add origin always!")
- origin_url = sample_data["origin"][0]["url"]
- visit_id = 9
-
- with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.origin_visit_update(origin_url, visit_id, "partial")
-
- assert mock_memory.call_count == 1
-
-
def test_retrying_proxy_storage_directory_add(swh_storage, sample_data):
"""Standard directory_add works as before
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1427,8 +1427,16 @@
)
]
)[0]
- swh_storage.origin_visit_update(
- origin_url, visit_id=visit.visit, status="full"
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=visit.visit,
+ date=now(),
+ status="full",
+ snapshot=None,
+ )
+ ]
)
swh_storage.refresh_stat_counters()
@@ -1463,7 +1471,17 @@
)
]
)[0]
- swh_storage.origin_visit_update(origin_url, visit.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=visit.visit,
+ date=now(),
+ status="full",
+ snapshot=None,
+ )
+ ]
+ )
random_origin_visit = swh_storage.origin_visit_get_random(visit_type)
assert random_origin_visit is None
@@ -1842,194 +1860,6 @@
for obj in expected_objects:
assert obj in actual_objects
- def test_origin_visit_update(self, swh_storage):
- # given
- origin_url = swh_storage.origin_add_one(data.origin)
- origin_url2 = swh_storage.origin_add_one(data.origin2)
- # date_visit < date_visit2
- date_visit = data.date_visit1
- date_visit2 = data.date_visit2
-
- # Round to milliseconds before insertion, so equality doesn't fail
- # after a round-trip through a DB (eg. Cassandra)
- date_visit = date_visit.replace(microsecond=round(date_visit.microsecond, -3))
- date_visit2 = date_visit2.replace(
- microsecond=round(date_visit2.microsecond, -3)
- )
-
- visit1 = OriginVisit(
- origin=origin_url,
- date=date_visit,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- visit2 = OriginVisit(
- origin=origin_url,
- date=date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- visit3 = OriginVisit(
- origin=origin_url2,
- date=date_visit2,
- type=data.type_visit3, # noqa
- status="ongoing",
- snapshot=None,
- )
- ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
-
- # when
- visit1_metadata = {
- "contents": 42,
- "directories": 22,
- }
- swh_storage.origin_visit_update(
- origin_url, ov1.visit, status="full", metadata=visit1_metadata
- )
- swh_storage.origin_visit_update(origin_url2, ov3.visit, status="partial")
-
- # then
- actual_origin_visits = list(swh_storage.origin_visit_get(origin_url))
- expected_visits = [
- {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "status": "full",
- "metadata": visit1_metadata,
- "snapshot": None,
- },
- {
- "origin": origin_url,
- "date": date_visit2,
- "visit": ov2.visit,
- "type": data.type_visit2,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- },
- ]
- for visit in expected_visits:
- assert visit in actual_origin_visits
-
- actual_origin_visits_bis = list(
- swh_storage.origin_visit_get(origin_url, limit=1)
- )
- assert actual_origin_visits_bis == [
- {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "status": "full",
- "metadata": visit1_metadata,
- "snapshot": None,
- }
- ]
-
- actual_origin_visits_ter = list(
- swh_storage.origin_visit_get(origin_url, last_visit=ov1.visit)
- )
- assert actual_origin_visits_ter == [
- {
- "origin": origin_url,
- "date": date_visit2,
- "visit": ov2.visit,
- "type": data.type_visit2,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- ]
-
- actual_origin_visits2 = list(swh_storage.origin_visit_get(origin_url2))
- assert actual_origin_visits2 == [
- {
- "origin": origin_url2,
- "date": date_visit2,
- "visit": ov3.visit,
- "type": data.type_visit3,
- "status": "partial",
- "metadata": None,
- "snapshot": None,
- }
- ]
-
- data1 = {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data2 = {
- "origin": origin_url,
- "date": date_visit2,
- "visit": ov2.visit,
- "type": data.type_visit2,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data3 = {
- "origin": origin_url2,
- "date": date_visit2,
- "visit": ov3.visit,
- "type": data.type_visit3,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data4 = {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "metadata": visit1_metadata,
- "status": "full",
- "snapshot": None,
- }
- data5 = {
- "origin": origin_url2,
- "date": date_visit2,
- "visit": ov3.visit,
- "type": data.type_visit3,
- "status": "partial",
- "metadata": None,
- "snapshot": None,
- }
- objects = list(swh_storage.journal_writer.journal.objects)
- assert ("origin", Origin.from_dict(data.origin)) in objects
- assert ("origin", Origin.from_dict(data.origin2)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data1)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data2)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data3)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data4)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data5)) in objects
-
- def test_origin_visit_update_validation(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- visit = swh_storage.origin_visit_add([visit])[0]
- with pytest.raises(
- (StorageArgumentException, ValueError), match="status"
- ) as cm:
- swh_storage.origin_visit_update(origin_url, visit.visit, status="foobar")
-
- if type(cm.value) == psycopg2.DataError:
- assert cm.value.pgcode == psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION
-
def test_origin_visit_find_by_date(self, swh_storage):
# given
origin_url = swh_storage.origin_add_one(data.origin)
@@ -2068,36 +1898,6 @@
def test_origin_visit_find_by_date__unknown_origin(self, swh_storage):
swh_storage.origin_visit_find_by_date("foo", data.date_visit2)
- def test_origin_visit_update_missing_snapshot(self, swh_storage):
- # given
- origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- origin_visit = swh_storage.origin_visit_add([visit])[0]
-
- # when
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
- ) # snapshot does not exist yet
-
- # then
- actual_origin_visit = swh_storage.origin_visit_get_by(
- origin_url, origin_visit.visit
- )
- assert actual_origin_visit["snapshot"] == data.snapshot["id"]
-
- # when
- swh_storage.snapshot_add([data.snapshot])
- assert actual_origin_visit["snapshot"] == data.snapshot["id"]
-
def test_origin_visit_get_by(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
origin_url2 = swh_storage.origin_add_one(data.origin2)
@@ -2111,11 +1911,16 @@
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
# Add some other {origin, visit} entries
@@ -2141,8 +1946,17 @@
"directories": 22,
}
- swh_storage.origin_visit_update(
- origin_url, origin_visit1.visit, status="full", metadata=visit1_metadata
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.snapshot["id"],
+ metadata=visit1_metadata,
+ )
+ ]
)
expected_origin_visit = origin_visit1.to_dict()
@@ -2458,11 +2272,16 @@
# Add snapshot to visit1; require_snapshot=True makes it return
# visit1 and require_snapshot=False still returns visit2
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**origin_visit1,
@@ -2479,7 +2298,18 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
+ )
+
assert {
**origin_visit1,
"snapshot": data.complete_snapshot["id"],
@@ -2490,8 +2320,17 @@
# Add snapshot to visit2 and check that the new snapshot is returned
swh_storage.snapshot_add([data.empty_snapshot])
- swh_storage.origin_visit_update(
- origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"]
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.empty_snapshot["id"],
+ )
+ ]
)
assert {
**origin_visit2,
@@ -2509,11 +2348,17 @@
# Add snapshot to visit3 (same date as visit2)
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov3.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov3.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**origin_visit1,
@@ -2559,48 +2404,54 @@
def test_snapshot_add_get_empty(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- origin_visit1 = swh_storage.origin_visit_add([visit])[0]
- visit_id = origin_visit1.visit
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )[0]
actual_result = swh_storage.snapshot_add([data.empty_snapshot])
assert actual_result == {"snapshot:add": 1}
date_now = now()
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.empty_snapshot["id"],
- date=date_now,
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=date_now,
+ status="full",
+ snapshot=data.empty_snapshot["id"],
+ )
+ ]
)
by_id = swh_storage.snapshot_get(data.empty_snapshot["id"])
assert by_id == {**data.empty_snapshot, "next_branch": None}
- by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id)
+ by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit)
assert by_ov == {**data.empty_snapshot, "next_branch": None}
data1 = {
"origin": origin_url,
"date": data.date_visit1,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"status": "ongoing",
"metadata": None,
"snapshot": None,
}
data2 = {
"origin": origin_url,
- "date": data.date_visit1,
- "visit": origin_visit1.visit,
- "status": "ongoing",
+ "date": date_now,
+ "visit": ov1.visit,
+ "status": "full",
"metadata": None,
"snapshot": data.empty_snapshot["id"],
}
@@ -2613,14 +2464,7 @@
),
("origin_visit_status", OriginVisitStatus.from_dict(data1)),
("snapshot", Snapshot.from_dict(data.empty_snapshot)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data2, "type": data.type_visit1,}),
- ),
- (
- "origin_visit_status",
- OriginVisitStatus.from_dict({**data2, "date": date_now}),
- ),
+ ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
]
def test_snapshot_add_get_complete(self, swh_storage):
@@ -2637,11 +2481,16 @@
visit_id = origin_visit1.visit
actual_result = swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert actual_result == {"snapshot:add": 1}
@@ -2803,11 +2652,16 @@
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
snp_id = data.complete_snapshot["id"]
@@ -2923,11 +2777,16 @@
visit_id = origin_visit1.visit
swh_storage.snapshot_add([data.snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
by_id = swh_storage.snapshot_get(data.snapshot["id"])
@@ -2939,82 +2798,78 @@
origin_visit_info = swh_storage.origin_visit_get_by(origin_url, visit_id)
assert origin_visit_info["snapshot"] == data.snapshot["id"]
- def test_snapshot_add_nonexistent_visit(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin)
- # unknown visit
- visit_id = 54164461156
- swh_storage.journal_writer.journal.objects[:] = []
-
- swh_storage.snapshot_add([data.snapshot])
-
- with pytest.raises(StorageArgumentException):
- swh_storage.origin_visit_update(
- origin_url, visit_id, status="ongoing", snapshot=data.snapshot["id"]
- )
-
- assert list(swh_storage.journal_writer.journal.objects) == [
- ("snapshot", Snapshot.from_dict(data.snapshot))
- ]
-
def test_snapshot_add_twice__by_origin_visit(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- origin_visit1 = swh_storage.origin_visit_add([visit])[0]
- visit1_id = origin_visit1.visit
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )[0]
swh_storage.snapshot_add([data.snapshot])
date_now2 = now()
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
- date=date_now2,
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=date_now2,
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
- by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit1_id)
+ by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit)
assert by_ov1 == {**data.snapshot, "next_branch": None}
- visit2 = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- origin_visit2 = swh_storage.origin_visit_add([visit2])[0]
- visit2_id = origin_visit2.visit
+ ov2 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )[0]
swh_storage.snapshot_add([data.snapshot])
date_now4 = now()
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit2.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
- date=date_now4,
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=date_now4,
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
- by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit2_id)
+ by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov2.visit)
assert by_ov2 == {**data.snapshot, "next_branch": None}
data1 = {
"origin": origin_url,
"date": data.date_visit1,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"status": "ongoing",
"metadata": None,
"snapshot": None,
}
data2 = {
"origin": origin_url,
- "date": data.date_visit1,
- "visit": origin_visit1.visit,
+ "date": date_now2,
+ "visit": ov1.visit,
"status": "ongoing",
"metadata": None,
"snapshot": data.snapshot["id"],
@@ -3022,15 +2877,15 @@
data3 = {
"origin": origin_url,
"date": data.date_visit2,
- "visit": origin_visit2.visit,
+ "visit": ov2.visit,
"status": "ongoing",
"metadata": None,
"snapshot": None,
}
data4 = {
"origin": origin_url,
- "date": data.date_visit2,
- "visit": origin_visit2.visit,
+ "date": date_now4,
+ "visit": ov2.visit,
"status": "ongoing",
"metadata": None,
"snapshot": data.snapshot["id"],
@@ -3044,27 +2899,13 @@
),
("origin_visit_status", OriginVisitStatus.from_dict(data1)),
("snapshot", Snapshot.from_dict(data.snapshot)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data2, "type": data.type_visit1}),
- ),
- (
- "origin_visit_status",
- OriginVisitStatus.from_dict({**data2, "date": date_now2}),
- ),
+ ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
(
"origin_visit",
OriginVisit.from_dict({**data3, "type": data.type_visit2}),
),
("origin_visit_status", OriginVisitStatus.from_dict(data3)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data4, "type": data.type_visit2}),
- ),
- (
- "origin_visit_status",
- OriginVisitStatus.from_dict({**data4, "date": date_now4}),
- ),
+ ("origin_visit_status", OriginVisitStatus.from_dict(data4),),
]
def test_snapshot_get_latest(self, swh_storage):
@@ -3097,11 +2938,16 @@
# Add snapshot to visit1, latest snapshot = visit 1 snapshot
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**data.complete_snapshot,
@@ -3116,7 +2962,17 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
+ )
assert {
**data.complete_snapshot,
"next_branch": None,
@@ -3124,8 +2980,16 @@
# Add snapshot to visit2 and check that the new snapshot is returned
swh_storage.snapshot_add([data.empty_snapshot])
- swh_storage.origin_visit_update(
- origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.empty_snapshot["id"],
+ )
+ ]
)
assert {
**data.empty_snapshot,
@@ -3141,11 +3005,16 @@
# Add snapshot to visit3 (same date as visit2) and check that
# the new snapshot is returned
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov3.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov3.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**data.complete_snapshot,
@@ -3176,11 +3045,16 @@
# Add unknown snapshot to visit1, check that the inconsistency is
# detected
- swh_storage.origin_visit_update(
- origin_url,
- ov1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
@@ -3194,7 +3068,18 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
+ )
+
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"]),
@@ -3208,8 +3093,16 @@
# Add unknown snapshot to visit2 and check that the inconsistency
# is detected
- swh_storage.origin_visit_update(
- origin_url, ov2.visit, status="ongoing", snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
@@ -3280,11 +3173,16 @@
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
swh_storage.directory_add([data.dir])
swh_storage.revision_add([data.revision])
@@ -4023,8 +3921,16 @@
origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
)
visit = swh_storage.origin_visit_add([visit])[0]
- swh_storage.origin_visit_update(
- origin_url, visit.visit, status="ongoing", snapshot=data.snapshot["id"]
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=visit.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
assert swh_storage.origin_count("github", with_visit=False) == 3
diff --git a/swh/storage/writer.py b/swh/storage/writer.py
--- a/swh/storage/writer.py
+++ b/swh/storage/writer.py
@@ -79,9 +79,6 @@
def origin_visit_add(self, visits: Iterable[OriginVisit]) -> None:
self.write_additions("origin_visit", visits)
- def origin_visit_update(self, visits: Iterable[OriginVisit]) -> None:
- self.write_additions("origin_visit", visits)
-
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None:
self.write_additions("origin_visit", visits)

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 6:50 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216418

Event Timeline