Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122809
D3380.id12003.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D3380.id12003.diff
View Options
diff --git a/swh/storage/algos/origin.py b/swh/storage/algos/origin.py
--- a/swh/storage/algos/origin.py
+++ b/swh/storage/algos/origin.py
@@ -92,4 +92,8 @@
require_snapshot=require_snapshot,
)
if visit_status is not None:
+ # storage api gives us too many data which no longer map to an
+ # origin-visit, so we drop those
+ for key in ["metadata", "status", "snapshot"]:
+ visit.pop(key, None)
return (OriginVisit.from_dict(visit), visit_status)
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -843,26 +843,6 @@
for visit_status in visit_statuses:
self._origin_visit_status_add(visit_status)
- def _origin_visit_merge(
- self, visit: Dict[str, Any], visit_status: OriginVisitStatus,
- ) -> Dict[str, Any]:
- """Merge origin_visit and visit_status together.
-
- """
- return OriginVisit.from_dict(
- {
- # default to the values in visit
- **visit,
- # override with the last update
- **visit_status.to_dict(),
- # visit['origin'] is the URL (via a join), while
- # visit_status['origin'] is only an id.
- "origin": visit["origin"],
- # but keep the date of the creation of the origin visit
- "date": visit["date"],
- }
- ).to_dict()
-
def _origin_visit_apply_last_status(self, visit: Dict[str, Any]) -> Dict[str, Any]:
"""Retrieve the latest visit status information for the origin visit.
Then merge it with the visit and return it.
@@ -872,7 +852,18 @@
visit["origin"], visit["visit"]
)
assert row is not None
- return self._origin_visit_merge(visit, row_to_visit_status(row))
+ visit_status = row_to_visit_status(row)
+ return {
+ # default to the values in visit
+ **visit,
+ # override with the last update
+ **visit_status.to_dict(),
+ # visit['origin'] is the URL (via a join), while
+ # visit_status['origin'] is only an id.
+ "origin": visit["origin"],
+ # but keep the date of the creation of the origin visit
+ "date": visit["date"],
+ }
def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]:
"""Retrieve origin visit and latest origin visit status and merge them
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -581,7 +581,7 @@
return None
visit = self._origin_visit_get_updated(origin_url, visit)
- snapshot_id = visit.snapshot
+ snapshot_id = visit["snapshot"]
if snapshot_id:
return self.snapshot_get(snapshot_id)
else:
@@ -739,7 +739,8 @@
for ov in self._origin_visits[orig["url"]]
)
for ov in visits:
- if ov.snapshot and ov.snapshot in self._snapshots:
+ snapshot = ov["snapshot"]
+ if snapshot and snapshot in self._snapshots:
filtered_origins.append(orig)
break
else:
@@ -845,7 +846,7 @@
for visit_status in visit_statuses:
self._origin_visit_status_add_one(visit_status)
- def _origin_visit_get_updated(self, origin: str, visit_id: int) -> OriginVisit:
+ def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]:
"""Merge origin visit and latest origin visit status
"""
@@ -855,16 +856,14 @@
visit_key = (origin, visit_id)
visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date)
- return OriginVisit.from_dict(
- {
- # default to the values in visit
- **visit.to_dict(),
- # override with the last update
- **visit_update.to_dict(),
- # but keep the date of the creation of the origin visit
- "date": visit.date,
- }
- )
+ return {
+ # default to the values in visit
+ **visit.to_dict(),
+ # override with the last update
+ **visit_update.to_dict(),
+ # but keep the date of the creation of the origin visit
+ "date": visit.date,
+ }
def origin_visit_get(
self,
@@ -893,7 +892,7 @@
visit_update = self._origin_visit_get_updated(origin_url, visit_id)
assert visit_update is not None
- yield visit_update.to_dict()
+ yield visit_update
def origin_visit_find_by_date(
self, origin: str, visit_date: datetime.datetime
@@ -904,7 +903,7 @@
visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit))
visit_update = self._origin_visit_get_updated(origin, visit.visit)
assert visit_update is not None
- return visit_update.to_dict()
+ return visit_update
return None
def origin_visit_get_by(self, origin: str, visit: int) -> Optional[Dict[str, Any]]:
@@ -914,7 +913,7 @@
):
visit_update = self._origin_visit_get_updated(origin_url, visit)
assert visit_update is not None
- return visit_update.to_dict()
+ return visit_update
return None
def origin_visit_get_latest(
@@ -936,16 +935,16 @@
]
if type is not None:
- visits = [visit for visit in visits if visit.type == type]
+ visits = [visit for visit in visits if visit["type"] == type]
if allowed_statuses is not None:
- visits = [visit for visit in visits if visit.status in allowed_statuses]
+ visits = [visit for visit in visits if visit["status"] in allowed_statuses]
if require_snapshot:
- visits = [visit for visit in visits if visit.snapshot]
+ visits = [visit for visit in visits if visit["snapshot"]]
- visit = max(visits, key=lambda v: (v.date, v.visit), default=None)
+ visit = max(visits, key=lambda v: (v["date"], v["visit"]), default=None)
if visit is None:
return None
- return visit.to_dict()
+ return visit
def origin_visit_status_get_latest(
self,
@@ -987,8 +986,11 @@
for visit in random_origin_visits:
updated_visit = self._origin_visit_get_updated(url, visit.visit)
assert updated_visit is not None
- if updated_visit.date > back_in_the_day and updated_visit.status == "full":
- return updated_visit.to_dict()
+ if (
+ updated_visit["date"] > back_in_the_day
+ and updated_visit["status"] == "full"
+ ):
+ return updated_visit
else:
return None
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -881,19 +881,6 @@
return None
return OriginVisitStatus.from_dict(row)
- def _origin_visit_get_updated(
- self, origin: str, visit_id: int, db, cur
- ) -> Optional[Dict[str, Any]]:
- """Retrieve origin visit and latest origin visit status and merge them
- into an origin visit.
-
- """
- row_visit = db.origin_visit_get(origin, visit_id)
- if row_visit is None:
- return None
- visit = dict(zip(db.origin_visit_get_cols, row_visit))
- return self._origin_visit_apply_update(visit, db=db, cur=cur)
-
def _origin_visit_apply_update(
self, visit: Dict[str, Any], db, cur=None
) -> Dict[str, Any]:
@@ -904,27 +891,17 @@
visit_status = db.origin_visit_status_get_latest(
visit["origin"], visit["visit"], cur=cur
)
- return self._origin_visit_merge(visit, visit_status)
-
- def _origin_visit_merge(
- self, visit: Dict[str, Any], visit_status: Dict[str, Any]
- ) -> Dict[str, Any]:
- """Merge origin_visit and origin_visit_status together.
-
- """
- return OriginVisit.from_dict(
- {
- # default to the values in visit
- **visit,
- # override with the last update
- **visit_status,
- # visit['origin'] is the URL (via a join), while
- # visit_status['origin'] is only an id.
- "origin": visit["origin"],
- # but keep the date of the creation of the origin visit
- "date": visit["date"],
- }
- ).to_dict()
+ return {
+ # default to the values in visit
+ **visit,
+ # override with the last update
+ **visit_status,
+ # visit['origin'] is the URL (via a join), while
+ # visit_status['origin'] is only an id.
+ "origin": visit["origin"],
+ # but keep the date of the creation of the origin visit
+ "date": visit["date"],
+ }
@timed
@db_transaction_generator(statement_timeout=500)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 2:36 AM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218349
Attached To
D3380: storage*: Drop intermediary conversion step into OriginVisit
Event Timeline
Log In to Comment