diff --git a/swh/storage/algos/origin.py b/swh/storage/algos/origin.py --- a/swh/storage/algos/origin.py +++ b/swh/storage/algos/origin.py @@ -3,21 +3,29 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict, Optional, Iterable, Tuple -from swh.model.model import OriginVisit, OriginVisitStatus +from typing import Any, Dict, Optional, Iterable, Iterator, Tuple +from swh.model.model import Origin, OriginVisit, OriginVisitStatus +from swh.storage.interface import StorageInterface -def iter_origins(storage, origin_from=1, origin_to=None, batch_size=10000): + +def iter_origins( + storage: StorageInterface, + origin_from: int = 1, + origin_to: Optional[int] = None, + batch_size: int = 10000, +) -> Iterator[Origin]: """Iterates over all origins in the storage. Args: storage: the storage object used for queries. + origin_from: lower interval boundary + origin_to: upper interval boundary batch_size: number of origins per query + Yields: - dict: the origin dictionary with the keys: + origin within the boundary [origin_to, origin_from] in batch_size - - type: origin's type - - url: origin's url """ start = origin_from while True: @@ -33,7 +41,7 @@ start = origins[-1]["id"] + 1 for origin in origins: del origin["id"] - yield origin + yield Origin.from_dict(origin) if origin_to and start > origin_to: break diff --git a/swh/storage/tests/algos/test_origin.py b/swh/storage/tests/algos/test_origin.py --- a/swh/storage/tests/algos/test_origin.py +++ b/swh/storage/tests/algos/test_origin.py @@ -22,14 +22,16 @@ @pytest.fixture def swh_storage_backend_config(): - yield {"cls": "validate", "storage": {"cls": "memory",}} + yield { + "cls": "memory", + } def test_iter_origins(swh_storage): origins = [ - {"url": "bar"}, - {"url": "qux"}, - {"url": "quuz"}, + Origin(url="bar"), + Origin(url="qux"), + Origin(url="quuz"), ] assert swh_storage.origin_add(origins) == {"origin:add": 3} assert_list_eq(iter_origins(swh_storage), origins) @@ -82,7 +84,7 @@ mock_origin_get_range.assert_called_with(origin_from=1, origin_count=42) -def test_origin_get_latest_visit_status_none(swh_storage): +def test_origin_get_latest_visit_status_none(swh_storage, sample_data_model): """Looking up unknown objects should return nothing """ @@ -90,11 +92,13 @@ assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None # unknown type so no result - origin = Origin.from_dict(data.origin) + origin = sample_data_model["origin"][0] + origin_visit = sample_data_model["origin_visit"][0] + assert origin_visit.origin == origin.url + swh_storage.origin_add_one(origin) - swh_storage.origin_visit_add( - [OriginVisit(origin=origin.url, date=data.date_visit1, type="git",),] - )[0] + swh_storage.origin_visit_add([origin_visit])[0] + assert origin_visit.type != "unknown" actual_origin_visit = origin_get_latest_visit_status( swh_storage, origin.url, type="unknown" )