Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123292
D3314.id11745.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
D3314.id11745.diff
View Options
diff --git a/swh/storage/algos/snapshot.py b/swh/storage/algos/snapshot.py
--- a/swh/storage/algos/snapshot.py
+++ b/swh/storage/algos/snapshot.py
@@ -1,8 +1,14 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Iterable, Optional
+
+from swh.model.model import Snapshot
+
+from swh.storage.algos.origin import origin_get_latest_visit_status
+
def snapshot_get_all_branches(storage, snapshot_id):
"""Get all the branches for a given snapshot
@@ -28,3 +34,61 @@
next_branch = data.get("next_branch")
return ret
+
+
+def snapshot_get_latest(
+ storage,
+ origin: str,
+ allowed_statuses: Optional[Iterable[str]] = None,
+ branches_count: Optional[int] = None,
+) -> Optional[Snapshot]:
+ """Get the latest snapshot for the given origin, optionally only from visits that have
+ one of the given allowed_statuses.
+
+ The branches of the snapshot are iterated in the lexicographical
+ order of their names.
+
+ Args:
+ storage: Storage instance
+ origin: the origin's URL
+ allowed_statuses: list of visit statuses considered
+ to find the latest snapshot for the visit. For instance,
+ ``allowed_statuses=['full']`` will only consider visits that
+ have successfully run to completion.
+ branches_count: Optional parameter to retrieve snapshot with all branches
+ (default behavior when None) or not. If set to positive number, the snapshot
+ will be partial with only that number of branches.
+
+ Raises:
+ ValueError if branches_count is not a positive value
+
+ Returns:
+ The snapshot object if one is found matching the criteria or None.
+
+ """
+ visit_and_status = origin_get_latest_visit_status(
+ storage, origin, allowed_statuses=allowed_statuses, require_snapshot=True
+ )
+
+ if not visit_and_status:
+ return None
+
+ _, visit_status = visit_and_status
+ snapshot_id = visit_status.snapshot
+ if not snapshot_id:
+ return None
+
+ if branches_count: # partial snapshot
+ if not isinstance(branches_count, int) or branches_count <= 0:
+ raise ValueError(
+ "Parameter branches_count must be a positive integer. "
+ f"Current value is {branches_count}"
+ )
+ snapshot = storage.snapshot_get_branches(
+ snapshot_id, branches_count=branches_count
+ )
+ snapshot.pop("next_branch")
+ else:
+ snapshot = snapshot_get_all_branches(storage, snapshot_id)
+
+ return Snapshot.from_dict(snapshot)
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -583,17 +583,6 @@
return self.snapshot_get(visit["snapshot"])
- def snapshot_get_latest(self, origin, allowed_statuses=None):
- visit = self.origin_visit_get_latest(
- origin, allowed_statuses=allowed_statuses, require_snapshot=True
- )
-
- if visit:
- assert visit["snapshot"]
- if self._cql_runner.snapshot_missing([visit["snapshot"]]):
- raise StorageArgumentException("Visit references unknown snapshot")
- return self.snapshot_get_branches(visit["snapshot"])
-
def snapshot_count_branches(self, snapshot_id):
if self._cql_runner.snapshot_missing([snapshot_id]):
# Makes sure we don't fetch branches for a snapshot that is
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -583,22 +583,6 @@
else:
return None
- def snapshot_get_latest(self, origin, allowed_statuses=None):
- origin_url = self._get_origin_url(origin)
- if not origin_url:
- return
-
- visit = self.origin_visit_get_latest(
- origin_url, allowed_statuses=allowed_statuses, require_snapshot=True
- )
- if visit and visit["snapshot"]:
- snapshot = self.snapshot_get(visit["snapshot"])
- if not snapshot:
- raise StorageArgumentException(
- "last origin visit references an unknown snapshot"
- )
- return snapshot
-
def snapshot_count_branches(self, snapshot_id):
snapshot = self._snapshots[snapshot_id]
return collections.Counter(
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -695,37 +695,6 @@
"""
...
- @remote_api_endpoint("snapshot/latest")
- def snapshot_get_latest(self, origin, allowed_statuses=None):
- """Get the content, possibly partial, of the latest snapshot for the
- given origin, optionally only from visits that have one of the given
- allowed_statuses
-
- The branches of the snapshot are iterated in the lexicographical
- order of their names.
-
- .. warning:: At most 1000 branches contained in the snapshot will be
- returned for performance reasons. In order to browse the whole
- set of branches, the method :meth:`snapshot_get_branches`
- should be used instead.
-
- Args:
- origin (str): the origin's URL
- allowed_statuses (list of str): list of visit statuses considered
- to find the latest snapshot for the visit. For instance,
- ``allowed_statuses=['full']`` will only consider visits that
- have successfully run to completion.
- Returns:
- dict: a dict with three keys:
- * **id**: identifier of the snapshot
- * **branches**: a dict of branches contained in the snapshot
- whose keys are the branches' names.
- * **next_branch**: the name of the first branch not returned
- or :const:`None` if the snapshot has less than 1000
- branches.
- """
- ...
-
@remote_api_endpoint("snapshot/count_branches")
def snapshot_count_branches(self, snapshot_id):
"""Count the number of branches in the snapshot with the given id
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -735,30 +735,6 @@
return None
- @timed
- @db_transaction(statement_timeout=4000)
- def snapshot_get_latest(self, origin, allowed_statuses=None, db=None, cur=None):
- if isinstance(origin, int):
- origin = self.origin_get({"id": origin}, db=db, cur=cur)
- if not origin:
- return
- origin = origin["url"]
-
- origin_visit = self.origin_visit_get_latest(
- origin,
- allowed_statuses=allowed_statuses,
- require_snapshot=True,
- db=db,
- cur=cur,
- )
- if origin_visit and origin_visit["snapshot"]:
- snapshot = self.snapshot_get(origin_visit["snapshot"], db=db, cur=cur)
- if not snapshot:
- raise StorageArgumentException(
- "last origin visit references an unknown snapshot"
- )
- return snapshot
-
@timed
@db_transaction(statement_timeout=2000)
def snapshot_count_branches(self, snapshot_id, db=None, cur=None):
diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py
--- a/swh/storage/tests/algos/test_snapshot.py
+++ b/swh/storage/tests/algos/test_snapshot.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,10 +6,14 @@
from hypothesis import given
import pytest
-from swh.model.identifiers import snapshot_identifier, identifier_to_bytes
from swh.model.hypothesis_strategies import snapshots, branch_names, branch_targets
+from swh.model.identifiers import snapshot_identifier, identifier_to_bytes
+from swh.model.model import Origin, OriginVisit, OriginVisitStatus, Snapshot
+
+from swh.storage.algos.snapshot import snapshot_get_all_branches, snapshot_get_latest
+from swh.storage.utils import now
-from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.storage.tests.storage_data import data
@pytest.fixture
@@ -42,3 +46,106 @@
returned_snapshot = snapshot_get_all_branches(swh_storage, snapshot["id"])
assert snapshot == returned_snapshot
+
+
+def test_snapshot_get_latest_none(swh_storage):
+ """Retrieve latest snapshot on unknown origin or origin without snapshot should
+ yield no result
+
+ """
+ assert snapshot_get_latest(swh_storage, "unknown-origin") is None
+
+ # no snapshot on origin visit then nothing is found
+ origin = Origin.from_dict(data.origin)
+ swh_storage.origin_add_one(origin)
+ swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin.url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ ]
+ )
+ assert snapshot_get_latest(swh_storage, origin.url) is None
+
+
+def test_snapshot_get_latest(swh_storage):
+ origin = Origin.from_dict(data.origin)
+ swh_storage.origin_add_one(origin)
+
+ visit1 = OriginVisit(
+ origin=origin.url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ ov1 = swh_storage.origin_visit_add([visit1])[0]
+
+ # Add snapshot to visit1, latest snapshot = visit 1 snapshot
+ complete_snapshot = Snapshot.from_dict(data.complete_snapshot)
+ swh_storage.snapshot_add([complete_snapshot])
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
+ date=data.date_visit2,
+ status="partial",
+ snapshot=None,
+ )
+ ]
+ )
+ assert data.date_visit1 < data.date_visit2
+
+ # no snapshot associated to the visit, so None
+ actual_snapshot = snapshot_get_latest(
+ swh_storage, origin.url, allowed_statuses=["partial"]
+ )
+ assert actual_snapshot is None
+
+ date_now = now()
+ assert data.date_visit2 < date_now
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
+ date=date_now,
+ status="full",
+ snapshot=complete_snapshot.id,
+ )
+ ]
+ )
+
+ actual_snapshot = snapshot_get_latest(swh_storage, origin.url)
+ assert actual_snapshot is not None
+ assert actual_snapshot == complete_snapshot
+
+ swh_storage.origin_visit_status_add(
+ [
+ OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
+ date=date_now,
+ status="full",
+ snapshot=complete_snapshot.id,
+ )
+ ]
+ )
+
+ actual_snapshot = snapshot_get_latest(swh_storage, origin.url)
+ assert actual_snapshot is not None
+ assert actual_snapshot == complete_snapshot
+
+ actual_snapshot = snapshot_get_latest(swh_storage, origin.url, branches_count=1)
+ assert actual_snapshot is not None
+ assert actual_snapshot.id == complete_snapshot.id
+ assert len(actual_snapshot.branches.values()) == 1
+
+ with pytest.raises(ValueError, match="branches_count must be a positive integer"):
+ snapshot_get_latest(swh_storage, origin.url, branches_count="something-wrong")
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -2863,213 +2863,6 @@
("origin_visit_status", OriginVisitStatus.from_dict(data4),),
]
- def test_snapshot_get_latest(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin)
- visit1 = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- visit2 = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- # Add a visit with the same date as the previous one
- visit3 = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit3,
- status="ongoing",
- snapshot=None,
- )
- ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
- # Two visits, both with no snapshot: latest snapshot is None
- assert swh_storage.snapshot_get_latest(origin_url) is None
-
- # Add snapshot to visit1, latest snapshot = visit 1 snapshot
- swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov1.visit,
- date=now(),
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
- )
- ]
- )
- assert {
- **data.complete_snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url)
-
- # Status filter: all three visits are status=ongoing, so no snapshot
- # returned
- assert (
- swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"])
- is None
- )
-
- # Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov1.visit,
- date=now(),
- status="full",
- snapshot=data.complete_snapshot["id"],
- )
- ]
- )
- assert {
- **data.complete_snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"])
-
- # Add snapshot to visit2 and check that the new snapshot is returned
- swh_storage.snapshot_add([data.empty_snapshot])
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov2.visit,
- date=now(),
- status="ongoing",
- snapshot=data.empty_snapshot["id"],
- )
- ]
- )
- assert {
- **data.empty_snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url)
-
- # Check that the status filter is still working
- assert {
- **data.complete_snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"])
-
- # Add snapshot to visit3 (same date as visit2) and check that
- # the new snapshot is returned
- swh_storage.snapshot_add([data.complete_snapshot])
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov3.visit,
- date=now(),
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
- )
- ]
- )
- assert {
- **data.complete_snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url)
-
- def test_snapshot_get_latest__missing_snapshot(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin)
- assert swh_storage.snapshot_get_latest(origin_url) is None
- visit1 = OriginVisit(
- origin=origin_url,
- date=data.date_visit1,
- type=data.type_visit1,
- status="ongoing",
- snapshot=None,
- )
- visit2 = OriginVisit(
- origin=origin_url,
- date=data.date_visit2,
- type=data.type_visit2,
- status="ongoing",
- snapshot=None,
- )
- ov1, ov2 = swh_storage.origin_visit_add([visit1, visit2])
-
- # Two visits, both with no snapshot: latest snapshot is None
- assert swh_storage.snapshot_get_latest(origin_url) is None
-
- # Add unknown snapshot to visit1, check that the inconsistency is
- # detected
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov1.visit,
- date=now(),
- status="ongoing",
- snapshot=data.complete_snapshot["id"],
- )
- ]
- )
- with pytest.raises(Exception):
- # XXX: should the exception be more specific than this?
- swh_storage.snapshot_get_latest(origin_url)
-
- # Status filter: both visits are status=ongoing, so no snapshot
- # returned
- assert (
- swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"])
- is None
- )
-
- # Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov1.visit,
- date=now(),
- status="full",
- snapshot=data.complete_snapshot["id"],
- )
- ]
- )
-
- with pytest.raises(Exception):
- # XXX: should the exception be more specific than this?
- swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"]),
-
- # Actually add the snapshot and check status filter again
- swh_storage.snapshot_add([data.complete_snapshot])
- assert {
- **data.complete_snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url)
-
- # Add unknown snapshot to visit2 and check that the inconsistency
- # is detected
- swh_storage.origin_visit_status_add(
- [
- OriginVisitStatus(
- origin=origin_url,
- visit=ov2.visit,
- date=now(),
- status="ongoing",
- snapshot=data.snapshot["id"],
- )
- ]
- )
- with pytest.raises(Exception):
- # XXX: should the exception be more specific than this?
- swh_storage.snapshot_get_latest(origin_url)
-
- # Actually add that snapshot and check that the new one is returned
- swh_storage.snapshot_add([data.snapshot])
- assert {
- **data.snapshot,
- "next_branch": None,
- } == swh_storage.snapshot_get_latest(origin_url)
-
def test_snapshot_get_random(self, swh_storage):
swh_storage.snapshot_add(
[data.snapshot, data.empty_snapshot, data.complete_snapshot]
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 12:58 PM (8 h, 5 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220536
Attached To
D3314: Move deprecated endpoint snapshot_get_latest from api endpoint to algos
Event Timeline
Log In to Comment