Page MenuHomeSoftware Heritage

D3380.id12003.diff
No OneTemporary

D3380.id12003.diff

diff --git a/swh/storage/algos/origin.py b/swh/storage/algos/origin.py
--- a/swh/storage/algos/origin.py
+++ b/swh/storage/algos/origin.py
@@ -92,4 +92,8 @@
require_snapshot=require_snapshot,
)
if visit_status is not None:
+ # storage api gives us too many data which no longer map to an
+ # origin-visit, so we drop those
+ for key in ["metadata", "status", "snapshot"]:
+ visit.pop(key, None)
return (OriginVisit.from_dict(visit), visit_status)
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -843,26 +843,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add(visit_status)
- def _origin_visit_merge(
- self, visit: Dict[str, Any], visit_status: OriginVisitStatus,
- ) -> Dict[str, Any]:
- """Merge origin_visit and visit_status together.
-
- """
- return OriginVisit.from_dict(
- {
- # default to the values in visit
- **visit,
- # override with the last update
- **visit_status.to_dict(),
- # visit['origin'] is the URL (via a join), while
- # visit_status['origin'] is only an id.
- "origin": visit["origin"],
- # but keep the date of the creation of the origin visit
- "date": visit["date"],
- }
- ).to_dict()
-
def _origin_visit_apply_last_status(self, visit: Dict[str, Any]) -> Dict[str, Any]:
"""Retrieve the latest visit status information for the origin visit.
Then merge it with the visit and return it.
@@ -872,7 +852,18 @@
visit["origin"], visit["visit"]
)
assert row is not None
- return self._origin_visit_merge(visit, row_to_visit_status(row))
+ visit_status = row_to_visit_status(row)
+ return {
+ # default to the values in visit
+ **visit,
+ # override with the last update
+ **visit_status.to_dict(),
+ # visit['origin'] is the URL (via a join), while
+ # visit_status['origin'] is only an id.
+ "origin": visit["origin"],
+ # but keep the date of the creation of the origin visit
+ "date": visit["date"],
+ }
def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]:
"""Retrieve origin visit and latest origin visit status and merge them
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -581,7 +581,7 @@
return None
visit = self._origin_visit_get_updated(origin_url, visit)
- snapshot_id = visit.snapshot
+ snapshot_id = visit["snapshot"]
if snapshot_id:
return self.snapshot_get(snapshot_id)
else:
@@ -739,7 +739,8 @@
for ov in self._origin_visits[orig["url"]]
)
for ov in visits:
- if ov.snapshot and ov.snapshot in self._snapshots:
+ snapshot = ov["snapshot"]
+ if snapshot and snapshot in self._snapshots:
filtered_origins.append(orig)
break
else:
@@ -845,7 +846,7 @@
for visit_status in visit_statuses:
self._origin_visit_status_add_one(visit_status)
- def _origin_visit_get_updated(self, origin: str, visit_id: int) -> OriginVisit:
+ def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]:
"""Merge origin visit and latest origin visit status
"""
@@ -855,16 +856,14 @@
visit_key = (origin, visit_id)
visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date)
- return OriginVisit.from_dict(
- {
- # default to the values in visit
- **visit.to_dict(),
- # override with the last update
- **visit_update.to_dict(),
- # but keep the date of the creation of the origin visit
- "date": visit.date,
- }
- )
+ return {
+ # default to the values in visit
+ **visit.to_dict(),
+ # override with the last update
+ **visit_update.to_dict(),
+ # but keep the date of the creation of the origin visit
+ "date": visit.date,
+ }
def origin_visit_get(
self,
@@ -893,7 +892,7 @@
visit_update = self._origin_visit_get_updated(origin_url, visit_id)
assert visit_update is not None
- yield visit_update.to_dict()
+ yield visit_update
def origin_visit_find_by_date(
self, origin: str, visit_date: datetime.datetime
@@ -904,7 +903,7 @@
visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit))
visit_update = self._origin_visit_get_updated(origin, visit.visit)
assert visit_update is not None
- return visit_update.to_dict()
+ return visit_update
return None
def origin_visit_get_by(self, origin: str, visit: int) -> Optional[Dict[str, Any]]:
@@ -914,7 +913,7 @@
):
visit_update = self._origin_visit_get_updated(origin_url, visit)
assert visit_update is not None
- return visit_update.to_dict()
+ return visit_update
return None
def origin_visit_get_latest(
@@ -936,16 +935,16 @@
]
if type is not None:
- visits = [visit for visit in visits if visit.type == type]
+ visits = [visit for visit in visits if visit["type"] == type]
if allowed_statuses is not None:
- visits = [visit for visit in visits if visit.status in allowed_statuses]
+ visits = [visit for visit in visits if visit["status"] in allowed_statuses]
if require_snapshot:
- visits = [visit for visit in visits if visit.snapshot]
+ visits = [visit for visit in visits if visit["snapshot"]]
- visit = max(visits, key=lambda v: (v.date, v.visit), default=None)
+ visit = max(visits, key=lambda v: (v["date"], v["visit"]), default=None)
if visit is None:
return None
- return visit.to_dict()
+ return visit
def origin_visit_status_get_latest(
self,
@@ -987,8 +986,11 @@
for visit in random_origin_visits:
updated_visit = self._origin_visit_get_updated(url, visit.visit)
assert updated_visit is not None
- if updated_visit.date > back_in_the_day and updated_visit.status == "full":
- return updated_visit.to_dict()
+ if (
+ updated_visit["date"] > back_in_the_day
+ and updated_visit["status"] == "full"
+ ):
+ return updated_visit
else:
return None
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -881,19 +881,6 @@
return None
return OriginVisitStatus.from_dict(row)
- def _origin_visit_get_updated(
- self, origin: str, visit_id: int, db, cur
- ) -> Optional[Dict[str, Any]]:
- """Retrieve origin visit and latest origin visit status and merge them
- into an origin visit.
-
- """
- row_visit = db.origin_visit_get(origin, visit_id)
- if row_visit is None:
- return None
- visit = dict(zip(db.origin_visit_get_cols, row_visit))
- return self._origin_visit_apply_update(visit, db=db, cur=cur)
-
def _origin_visit_apply_update(
self, visit: Dict[str, Any], db, cur=None
) -> Dict[str, Any]:
@@ -904,27 +891,17 @@
visit_status = db.origin_visit_status_get_latest(
visit["origin"], visit["visit"], cur=cur
)
- return self._origin_visit_merge(visit, visit_status)
-
- def _origin_visit_merge(
- self, visit: Dict[str, Any], visit_status: Dict[str, Any]
- ) -> Dict[str, Any]:
- """Merge origin_visit and origin_visit_status together.
-
- """
- return OriginVisit.from_dict(
- {
- # default to the values in visit
- **visit,
- # override with the last update
- **visit_status,
- # visit['origin'] is the URL (via a join), while
- # visit_status['origin'] is only an id.
- "origin": visit["origin"],
- # but keep the date of the creation of the origin visit
- "date": visit["date"],
- }
- ).to_dict()
+ return {
+ # default to the values in visit
+ **visit,
+ # override with the last update
+ **visit_status,
+ # visit['origin'] is the URL (via a join), while
+ # visit_status['origin'] is only an id.
+ "origin": visit["origin"],
+ # but keep the date of the creation of the origin visit
+ "date": visit["date"],
+ }
@timed
@db_transaction_generator(statement_timeout=500)

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 2:36 AM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218349

Event Timeline