Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124138
D3276.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
45 KB
Subscribers
None
D3276.diff
View Options
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -838,49 +838,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add(visit_status)
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- ):
- origin_url = origin # TODO: rename the argument
-
- # Get the existing data of the visit
- visit_ = self.origin_visit_get_by(origin_url, visit_id)
- if not visit_:
- raise StorageArgumentException("This origin visit does not exist.")
- with convert_validation_exceptions():
- visit = OriginVisit.from_dict(visit_)
-
- updates: Dict[str, Any] = {"status": status}
- if metadata and metadata != visit.metadata:
- updates["metadata"] = metadata
- if snapshot and snapshot != visit.snapshot:
- updates["snapshot"] = snapshot
-
- with convert_validation_exceptions():
- visit = attr.evolve(visit, **updates)
-
- self.journal_writer.origin_visit_update([visit])
-
- last_visit_update = self._origin_visit_get_updated(visit.origin, visit.visit)
- assert last_visit_update is not None
-
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date or now(),
- status=status,
- snapshot=snapshot or last_visit_update["snapshot"],
- metadata=metadata or last_visit_update["metadata"],
- )
- self._origin_visit_status_add(visit_status)
-
def _origin_visit_merge(
self, visit: Dict[str, Any], visit_status: Dict[str, Any]
) -> Dict[str, Any]:
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -477,31 +477,6 @@
+ [jsonize(visit_status.metadata)],
)
- def origin_visit_update(self, origin_id, visit_id, updates, cur=None):
- """Update origin_visit's status."""
- cur = self._cursor(cur)
- update_cols = []
- values = []
- where = ["origin.id = origin_visit.origin", "origin.url=%s", "visit=%s"]
- where_values = [origin_id, visit_id]
- if "status" in updates:
- update_cols.append("status=%s")
- values.append(updates.pop("status"))
- if "metadata" in updates:
- update_cols.append("metadata=%s")
- values.append(jsonize(updates.pop("metadata")))
- if "snapshot" in updates:
- update_cols.append("snapshot=%s")
- values.append(updates.pop("snapshot"))
- assert not updates, "Unknown fields: %r" % updates
- query = """UPDATE origin_visit
- SET {update_cols}
- FROM origin
- WHERE {where}""".format(
- **{"update_cols": ", ".join(update_cols), "where": " AND ".join(where)}
- )
- cur.execute(query, (*values, *where_values))
-
def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None:
# doing an extra query like this is way simpler than trying to join
# the origin id in the query below
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -853,56 +853,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add_one(visit_status)
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- ):
- origin_url = self._get_origin_url(origin)
- if origin_url is None:
- raise StorageArgumentException("Unknown origin.")
-
- try:
- visit = self._origin_visits[origin_url][visit_id - 1]
- except IndexError:
- raise StorageArgumentException("Unknown visit_id for this origin") from None
-
- updates: Dict[str, Any] = {
- "status": status,
- }
- if metadata and metadata != visit.metadata:
- updates["metadata"] = metadata
- if snapshot and snapshot != visit.snapshot:
- updates["snapshot"] = snapshot
-
- if updates:
- with convert_validation_exceptions():
- updated_visit = OriginVisit.from_dict({**visit.to_dict(), **updates})
- self.journal_writer.origin_visit_update([updated_visit])
-
- self._origin_visits[origin_url][visit_id - 1] = updated_visit
-
- # Retrieve the previous visit status
- assert visit.visit is not None
-
- last_visit_status = self._origin_visit_get_updated(origin, visit_id)
- assert last_visit_status is not None
-
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date or now(),
- status=status,
- snapshot=snapshot or last_visit_status.snapshot,
- metadata=metadata or last_visit_status.metadata,
- )
- self._origin_visit_status_add_one(visit_status)
-
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None:
for visit in visits:
if visit.visit is None:
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -810,33 +810,6 @@
"""
...
- @remote_api_endpoint("origin/visit/update")
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- ):
- """Update an origin_visit's status.
-
- Args:
- origin (str): visited origin's URL
- visit_id: Visit's id
- status: Visit's new status
- metadata: Data associated to the visit
- snapshot (sha1_git): identifier of the snapshot to add to
- the visit
- date: Update date
-
- Returns:
- None
-
- """
- ...
-
@remote_api_endpoint("origin/visit/upsert")
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None:
"""Add a origin_visits with a specific id and with all its data.
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -878,64 +878,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add(visit_status, db, cur)
- @timed
- @db_transaction()
- def origin_visit_update(
- self,
- origin: str,
- visit_id: int,
- status: str,
- metadata: Optional[Dict] = None,
- snapshot: Optional[bytes] = None,
- date: Optional[datetime.datetime] = None,
- db=None,
- cur=None,
- ):
- if not isinstance(origin, str):
- raise StorageArgumentException(
- "origin must be a string, not %r" % (origin,)
- )
- origin_url = origin
- visit = db.origin_visit_get(origin_url, visit_id, cur=cur)
-
- if not visit:
- raise StorageArgumentException("Invalid visit_id for this origin.")
-
- visit = dict(zip(db.origin_visit_get_cols, visit))
-
- updates: Dict[str, Any] = {
- "status": status,
- }
- if metadata and metadata != visit["metadata"]:
- updates["metadata"] = metadata
- if snapshot and snapshot != visit["snapshot"]:
- updates["snapshot"] = snapshot
-
- if updates:
- with convert_validation_exceptions():
- updated_visit = OriginVisit.from_dict({**visit, **updates})
- self.journal_writer.origin_visit_update([updated_visit])
-
- # Write updates to origin visit (backward compatibility)
- db.origin_visit_update(origin, visit_id, updates)
-
- # Add new origin visit status
- last_visit_status = self._origin_visit_get_updated(
- origin, visit_id, db=db, cur=cur
- )
- assert last_visit_status is not None
-
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date or now(),
- status=status,
- snapshot=snapshot or last_visit_status["snapshot"],
- metadata=metadata or last_visit_status["metadata"],
- )
- self._origin_visit_status_add(visit_status, db=db, cur=cur)
-
def _origin_visit_get_updated(
self, origin: str, visit_id: int, db, cur
) -> Optional[Dict[str, Any]]:
diff --git a/swh/storage/tests/test_kafka_writer.py b/swh/storage/tests/test_kafka_writer.py
--- a/swh/storage/tests/test_kafka_writer.py
+++ b/swh/storage/tests/test_kafka_writer.py
@@ -53,14 +53,8 @@
for obj in objs:
assert isinstance(obj, OriginVisit)
storage.origin_add_one(Origin(url=obj.origin))
- visit = method([obj])[0]
+ method([obj])
expected_messages += 1 + 1 # 1 visit + 1 visit status
-
- obj_d = obj.to_dict()
- for k in ("visit", "origin", "date", "type"):
- del obj_d[k]
- storage.origin_visit_update(obj.origin, visit.visit, **obj_d)
- expected_messages += 1 + 1 # 1 visit update + 1 visit status
else:
assert False, obj_type
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -679,87 +679,6 @@
assert mock_memory.call_count == 1
-def test_retrying_proxy_swh_storage_origin_visit_update(swh_storage, sample_data):
- """Standard origin_visit_update works as before
-
- """
- sample_origin = sample_data["origin"][0]
- origin_url = swh_storage.origin_add_one(sample_origin)
- visit = OriginVisit(
- origin=origin_url, date=date_visit1, type="hg", status="ongoing", snapshot=None
- )
-
- origin_visit = swh_storage.origin_visit_add([visit])[0]
-
- ov = next(swh_storage.origin_visit_get(origin_url))
- assert ov["origin"] == origin_url
- assert ov["visit"] == origin_visit.visit
- assert ov["status"] == "ongoing"
- assert ov["snapshot"] is None
- assert ov["metadata"] is None
-
- swh_storage.origin_visit_update(origin_url, origin_visit.visit, status="full")
-
- ov = next(swh_storage.origin_visit_get(origin_url))
- assert ov["origin"] == origin_url
- assert ov["visit"] == origin_visit.visit
- assert ov["status"] == "full"
- assert ov["snapshot"] is None
- assert ov["metadata"] is None
-
-
-def test_retrying_proxy_swh_storage_origin_visit_update_retry(
- monkeypatch_sleep, swh_storage, sample_data, mocker, fake_hash_collision
-):
- """Multiple retries for hash collision and psycopg2 error but finally ok
-
- """
- sample_origin = sample_data["origin"][1]
- origin_url = sample_origin["url"]
-
- mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_visit_update"
- )
- mock_memory.side_effect = [
- # first try goes ko
- fake_hash_collision,
- # second try goes ko
- psycopg2.IntegrityError("origin already inserted"),
- # ok then!
- {"origin": origin_url, "visit": 1},
- ]
-
- visit_id = 1
- swh_storage.origin_visit_update(origin_url, visit_id, status="full")
-
- mock_memory.assert_has_calls(
- [
- call(origin_url, visit_id, metadata=None, snapshot=None, status="full"),
- call(origin_url, visit_id, metadata=None, snapshot=None, status="full"),
- call(origin_url, visit_id, metadata=None, snapshot=None, status="full"),
- ]
- )
-
-
-def test_retrying_proxy_swh_storage_origin_visit_update_failure(
- swh_storage, sample_data, mocker
-):
- """Unfiltered errors are raising without retry
-
- """
- mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_visit_update"
- )
- mock_memory.side_effect = StorageArgumentException("Refuse to add origin always!")
- origin_url = sample_data["origin"][0]["url"]
- visit_id = 9
-
- with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.origin_visit_update(origin_url, visit_id, "partial")
-
- assert mock_memory.call_count == 1
-
-
def test_retrying_proxy_storage_directory_add(swh_storage, sample_data):
"""Standard directory_add works as before
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1427,8 +1427,16 @@
)
]
)[0]
- swh_storage.origin_visit_update(
- origin_url, visit_id=visit.visit, status="full"
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=visit.visit,
+ date=now(),
+ status="full",
+ snapshot=None,
+ )
+ ]
)
swh_storage.refresh_stat_counters()
@@ -1463,7 +1471,17 @@
)
]
)[0]
- swh_storage.origin_visit_update(origin_url, visit.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=visit.visit,
+ date=now(),
+ status="full",
+ snapshot=None,
+ )
+ ]
+ )
random_origin_visit = swh_storage.origin_visit_get_random(visit_type)
assert random_origin_visit is None
@@ -1842,194 +1860,6 @@
for obj in expected_objects:
assert obj in actual_objects
- def test_origin_visit_update(self, swh_storage):
- # given
- origin_url = swh_storage.origin_add_one(data.origin)
- origin_url2 = swh_storage.origin_add_one(data.origin2)
- # date_visit < date_visit2
- date_visit = data.date_visit1
- date_visit2 = data.date_visit2
-
- # Round to milliseconds before insertion, so equality doesn't fail
- # after a round-trip through a DB (eg. Cassandra)
- date_visit = date_visit.replace(microsecond=round(date_visit.microsecond, -3))
- date_visit2 = date_visit2.replace(
- microsecond=round(date_visit2.microsecond, -3)
- )
-
- visit1 = OriginVisit(
- origin=origin_url,
- date=date_visit,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- visit2 = OriginVisit(
- origin=origin_url,
- date=date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- visit3 = OriginVisit(
- origin=origin_url2,
- date=date_visit2,
- type=data.type_visit3, # noqa
- status="ongoing",
- snapshot=None,
- )
- ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
-
- # when
- visit1_metadata = {
- "contents": 42,
- "directories": 22,
- }
- swh_storage.origin_visit_update(
- origin_url, ov1.visit, status="full", metadata=visit1_metadata
- )
- swh_storage.origin_visit_update(origin_url2, ov3.visit, status="partial")
-
- # then
- actual_origin_visits = list(swh_storage.origin_visit_get(origin_url))
- expected_visits = [
- {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "status": "full",
- "metadata": visit1_metadata,
- "snapshot": None,
- },
- {
- "origin": origin_url,
- "date": date_visit2,
- "visit": ov2.visit,
- "type": data.type_visit2,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- },
- ]
- for visit in expected_visits:
- assert visit in actual_origin_visits
-
- actual_origin_visits_bis = list(
- swh_storage.origin_visit_get(origin_url, limit=1)
- )
- assert actual_origin_visits_bis == [
- {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "status": "full",
- "metadata": visit1_metadata,
- "snapshot": None,
- }
- ]
-
- actual_origin_visits_ter = list(
- swh_storage.origin_visit_get(origin_url, last_visit=ov1.visit)
- )
- assert actual_origin_visits_ter == [
- {
- "origin": origin_url,
- "date": date_visit2,
- "visit": ov2.visit,
- "type": data.type_visit2,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- ]
-
- actual_origin_visits2 = list(swh_storage.origin_visit_get(origin_url2))
- assert actual_origin_visits2 == [
- {
- "origin": origin_url2,
- "date": date_visit2,
- "visit": ov3.visit,
- "type": data.type_visit3,
- "status": "partial",
- "metadata": None,
- "snapshot": None,
- }
- ]
-
- data1 = {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data2 = {
- "origin": origin_url,
- "date": date_visit2,
- "visit": ov2.visit,
- "type": data.type_visit2,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data3 = {
- "origin": origin_url2,
- "date": date_visit2,
- "visit": ov3.visit,
- "type": data.type_visit3,
- "status": "ongoing",
- "metadata": None,
- "snapshot": None,
- }
- data4 = {
- "origin": origin_url,
- "date": date_visit,
- "visit": ov1.visit,
- "type": data.type_visit1,
- "metadata": visit1_metadata,
- "status": "full",
- "snapshot": None,
- }
- data5 = {
- "origin": origin_url2,
- "date": date_visit2,
- "visit": ov3.visit,
- "type": data.type_visit3,
- "status": "partial",
- "metadata": None,
- "snapshot": None,
- }
- objects = list(swh_storage.journal_writer.journal.objects)
- assert ("origin", Origin.from_dict(data.origin)) in objects
- assert ("origin", Origin.from_dict(data.origin2)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data1)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data2)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data3)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data4)) in objects
- assert ("origin_visit", OriginVisit.from_dict(data5)) in objects
-
- def test_origin_visit_update_validation(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- visit = swh_storage.origin_visit_add([visit])[0]
- with pytest.raises(
- (StorageArgumentException, ValueError), match="status"
- ) as cm:
- swh_storage.origin_visit_update(origin_url, visit.visit, status="foobar")
-
- if type(cm.value) == psycopg2.DataError:
- assert cm.value.pgcode == psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION
-
def test_origin_visit_find_by_date(self, swh_storage):
# given
origin_url = swh_storage.origin_add_one(data.origin)
@@ -2068,36 +1898,6 @@
def test_origin_visit_find_by_date__unknown_origin(self, swh_storage):
swh_storage.origin_visit_find_by_date("foo", data.date_visit2)
- def test_origin_visit_update_missing_snapshot(self, swh_storage):
- # given
- origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- origin_visit = swh_storage.origin_visit_add([visit])[0]
-
- # when
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
- ) # snapshot does not exist yet
-
- # then
- actual_origin_visit = swh_storage.origin_visit_get_by(
- origin_url, origin_visit.visit
- )
- assert actual_origin_visit["snapshot"] == data.snapshot["id"]
-
- # when
- swh_storage.snapshot_add([data.snapshot])
- assert actual_origin_visit["snapshot"] == data.snapshot["id"]
-
def test_origin_visit_get_by(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
origin_url2 = swh_storage.origin_add_one(data.origin2)
@@ -2111,11 +1911,16 @@
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
# Add some other {origin, visit} entries
@@ -2141,8 +1946,17 @@
"directories": 22,
}
- swh_storage.origin_visit_update(
- origin_url, origin_visit1.visit, status="full", metadata=visit1_metadata
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.snapshot["id"],
+ metadata=visit1_metadata,
+ )
+ ]
)
expected_origin_visit = origin_visit1.to_dict()
@@ -2458,11 +2272,16 @@
# Add snapshot to visit1; require_snapshot=True makes it return
# visit1 and require_snapshot=False still returns visit2
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**origin_visit1,
@@ -2479,7 +2298,18 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
+ )
+
assert {
**origin_visit1,
"snapshot": data.complete_snapshot["id"],
@@ -2490,8 +2320,17 @@
# Add snapshot to visit2 and check that the new snapshot is returned
swh_storage.snapshot_add([data.empty_snapshot])
- swh_storage.origin_visit_update(
- origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"]
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.empty_snapshot["id"],
+ )
+ ]
)
assert {
**origin_visit2,
@@ -2509,11 +2348,17 @@
# Add snapshot to visit3 (same date as visit2)
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov3.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov3.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**origin_visit1,
@@ -2559,48 +2404,54 @@
def test_snapshot_add_get_empty(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- origin_visit1 = swh_storage.origin_visit_add([visit])[0]
- visit_id = origin_visit1.visit
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )[0]
actual_result = swh_storage.snapshot_add([data.empty_snapshot])
assert actual_result == {"snapshot:add": 1}
date_now = now()
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.empty_snapshot["id"],
- date=date_now,
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=date_now,
+ status="full",
+ snapshot=data.empty_snapshot["id"],
+ )
+ ]
)
by_id = swh_storage.snapshot_get(data.empty_snapshot["id"])
assert by_id == {**data.empty_snapshot, "next_branch": None}
- by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id)
+ by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit)
assert by_ov == {**data.empty_snapshot, "next_branch": None}
data1 = {
"origin": origin_url,
"date": data.date_visit1,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"status": "ongoing",
"metadata": None,
"snapshot": None,
}
data2 = {
"origin": origin_url,
- "date": data.date_visit1,
- "visit": origin_visit1.visit,
- "status": "ongoing",
+ "date": date_now,
+ "visit": ov1.visit,
+ "status": "full",
"metadata": None,
"snapshot": data.empty_snapshot["id"],
}
@@ -2613,14 +2464,7 @@
),
("origin_visit_status", OriginVisitStatus.from_dict(data1)),
("snapshot", Snapshot.from_dict(data.empty_snapshot)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data2, "type": data.type_visit1,}),
- ),
- (
- "origin_visit_status",
- OriginVisitStatus.from_dict({**data2, "date": date_now}),
- ),
+ ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
]
def test_snapshot_add_get_complete(self, swh_storage):
@@ -2637,11 +2481,16 @@
visit_id = origin_visit1.visit
actual_result = swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert actual_result == {"snapshot:add": 1}
@@ -2803,11 +2652,16 @@
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
snp_id = data.complete_snapshot["id"]
@@ -2923,11 +2777,16 @@
visit_id = origin_visit1.visit
swh_storage.snapshot_add([data.snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
by_id = swh_storage.snapshot_get(data.snapshot["id"])
@@ -2939,82 +2798,78 @@
origin_visit_info = swh_storage.origin_visit_get_by(origin_url, visit_id)
assert origin_visit_info["snapshot"] == data.snapshot["id"]
- def test_snapshot_add_nonexistent_visit(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin)
- # unknown visit
- visit_id = 54164461156
- swh_storage.journal_writer.journal.objects[:] = []
-
- swh_storage.snapshot_add([data.snapshot])
-
- with pytest.raises(StorageArgumentException):
- swh_storage.origin_visit_update(
- origin_url, visit_id, status="ongoing", snapshot=data.snapshot["id"]
- )
-
- assert list(swh_storage.journal_writer.journal.objects) == [
- ("snapshot", Snapshot.from_dict(data.snapshot))
- ]
-
def test_snapshot_add_twice__by_origin_visit(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- visit = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- origin_visit1 = swh_storage.origin_visit_add([visit])[0]
- visit1_id = origin_visit1.visit
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )[0]
swh_storage.snapshot_add([data.snapshot])
date_now2 = now()
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
- date=date_now2,
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=date_now2,
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
- by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit1_id)
+ by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit)
assert by_ov1 == {**data.snapshot, "next_branch": None}
- visit2 = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- origin_visit2 = swh_storage.origin_visit_add([visit2])[0]
- visit2_id = origin_visit2.visit
+ ov2 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )[0]
swh_storage.snapshot_add([data.snapshot])
date_now4 = now()
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit2.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
- date=date_now4,
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=date_now4,
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
- by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit2_id)
+ by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov2.visit)
assert by_ov2 == {**data.snapshot, "next_branch": None}
data1 = {
"origin": origin_url,
"date": data.date_visit1,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"status": "ongoing",
"metadata": None,
"snapshot": None,
}
data2 = {
"origin": origin_url,
- "date": data.date_visit1,
- "visit": origin_visit1.visit,
+ "date": date_now2,
+ "visit": ov1.visit,
"status": "ongoing",
"metadata": None,
"snapshot": data.snapshot["id"],
@@ -3022,15 +2877,15 @@
data3 = {
"origin": origin_url,
"date": data.date_visit2,
- "visit": origin_visit2.visit,
+ "visit": ov2.visit,
"status": "ongoing",
"metadata": None,
"snapshot": None,
}
data4 = {
"origin": origin_url,
- "date": data.date_visit2,
- "visit": origin_visit2.visit,
+ "date": date_now4,
+ "visit": ov2.visit,
"status": "ongoing",
"metadata": None,
"snapshot": data.snapshot["id"],
@@ -3044,27 +2899,13 @@
),
("origin_visit_status", OriginVisitStatus.from_dict(data1)),
("snapshot", Snapshot.from_dict(data.snapshot)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data2, "type": data.type_visit1}),
- ),
- (
- "origin_visit_status",
- OriginVisitStatus.from_dict({**data2, "date": date_now2}),
- ),
+ ("origin_visit_status", OriginVisitStatus.from_dict(data2),),
(
"origin_visit",
OriginVisit.from_dict({**data3, "type": data.type_visit2}),
),
("origin_visit_status", OriginVisitStatus.from_dict(data3)),
- (
- "origin_visit",
- OriginVisit.from_dict({**data4, "type": data.type_visit2}),
- ),
- (
- "origin_visit_status",
- OriginVisitStatus.from_dict({**data4, "date": date_now4}),
- ),
+ ("origin_visit_status", OriginVisitStatus.from_dict(data4),),
]
def test_snapshot_get_latest(self, swh_storage):
@@ -3097,11 +2938,16 @@
# Add snapshot to visit1, latest snapshot = visit 1 snapshot
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**data.complete_snapshot,
@@ -3116,7 +2962,17 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
+ )
assert {
**data.complete_snapshot,
"next_branch": None,
@@ -3124,8 +2980,16 @@
# Add snapshot to visit2 and check that the new snapshot is returned
swh_storage.snapshot_add([data.empty_snapshot])
- swh_storage.origin_visit_update(
- origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.empty_snapshot["id"],
+ )
+ ]
)
assert {
**data.empty_snapshot,
@@ -3141,11 +3005,16 @@
# Add snapshot to visit3 (same date as visit2) and check that
# the new snapshot is returned
swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- ov3.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov3.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
assert {
**data.complete_snapshot,
@@ -3176,11 +3045,16 @@
# Add unknown snapshot to visit1, check that the inconsistency is
# detected
- swh_storage.origin_visit_update(
- origin_url,
- ov1.visit,
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
)
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
@@ -3194,7 +3068,18 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov1.visit,
+ date=now(),
+ status="full",
+ snapshot=data.complete_snapshot["id"],
+ )
+ ]
+ )
+
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"]),
@@ -3208,8 +3093,16 @@
# Add unknown snapshot to visit2 and check that the inconsistency
# is detected
- swh_storage.origin_visit_update(
- origin_url, ov2.visit, status="ongoing", snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=ov2.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
@@ -3280,11 +3173,16 @@
origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.snapshot])
- swh_storage.origin_visit_update(
- origin_url,
- origin_visit1.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=origin_visit1.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
swh_storage.directory_add([data.dir])
swh_storage.revision_add([data.revision])
@@ -4023,8 +3921,16 @@
origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
)
visit = swh_storage.origin_visit_add([visit])[0]
- swh_storage.origin_visit_update(
- origin_url, visit.visit, status="ongoing", snapshot=data.snapshot["id"]
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin_url,
+ visit=visit.visit,
+ date=now(),
+ status="ongoing",
+ snapshot=data.snapshot["id"],
+ )
+ ]
)
assert swh_storage.origin_count("github", with_visit=False) == 3
diff --git a/swh/storage/writer.py b/swh/storage/writer.py
--- a/swh/storage/writer.py
+++ b/swh/storage/writer.py
@@ -79,9 +79,6 @@
def origin_visit_add(self, visits: Iterable[OriginVisit]) -> None:
self.write_additions("origin_visit", visits)
- def origin_visit_update(self, visits: Iterable[OriginVisit]) -> None:
- self.write_additions("origin_visit", visits)
-
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None:
self.write_additions("origin_visit", visits)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 6:50 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216418
Attached To
D3276: storage*: Remove origin-visit-update endpoint
Event Timeline
Log In to Comment