diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -844,15 +844,37 @@ def origin_visit_get( self, origin: str, - last_visit: Optional[int] = None, - limit: Optional[int] = None, + page_token: Optional[str] = None, order: str = "asc", - ) -> Iterable[Dict[str, Any]]: - rows = self._cql_runner.origin_visit_get(origin, last_visit, limit, order) + limit: int = 10, + ) -> Dict[str, Any]: + result: Dict[str, Any] = {} + order = order.lower() + allowed_orders = ["asc", "desc"] + if order not in allowed_orders: + raise StorageArgumentException( + f"order must be one of {', '.join(allowed_orders)}." + ) + if page_token and not isinstance(page_token, str): + raise StorageArgumentException("page_token must be a string.") + visit_from = page_token and int(page_token) + visits: List[OriginVisit] = [] + extra_limit = limit + 1 + rows = self._cql_runner.origin_visit_get(origin, visit_from, extra_limit, order) for row in rows: - visit = self._format_origin_visit_row(row) - yield self._origin_visit_apply_last_status(visit) + visits.append(converters.row_to_visit(row)) + + assert len(visits) <= extra_limit + if len(visits) == extra_limit: + last_visit = visits[limit] + visits = visits[:limit] + assert last_visit is not None + result["next_page_token"] = str(last_visit.visit) + + if visits: + result["visits"] = visits + return result def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -481,6 +481,8 @@ + [jsonize(visit_status.metadata)], ) + origin_visit_cols = ["origin", "visit", "date", "type"] + def origin_visit_add_with_id(self, origin_visit: OriginVisit, cur=None) -> None: """Insert origin visit when id are already set @@ -488,12 +490,11 @@ ov = origin_visit assert ov.visit is not None cur = self._cursor(cur) - origin_visit_cols = ["origin", "visit", "date", "type"] query = """INSERT INTO origin_visit ({cols}) VALUES ((select id from origin where url=%s), {values}) ON CONFLICT (origin, visit) DO NOTHING""".format( - cols=", ".join(origin_visit_cols), - values=", ".join("%s" for col in origin_visit_cols[1:]), + cols=", ".join(self.origin_visit_cols), + values=", ".join("%s" for col in self.origin_visit_cols[1:]), ) cur.execute(query, (ov.origin, ov.visit, ov.date, ov.type)) @@ -618,6 +619,42 @@ cur.execute(query, tuple(query_params)) yield from cur + def origin_visit_get_range( + self, + origin: str, + visit_from: int = 0, + order: str = "asc", + limit: int = 10, + cur=None, + ): + assert order.lower() in ["asc", "desc"] + cur = self._cursor(cur) + + origin_visit_cols = ["o.url as origin", "ov.visit", "ov.date", "ov.type"] + query_parts = [ + f"SELECT {', '.join(origin_visit_cols)} " "FROM origin_visit ov ", + "INNER JOIN origin o ON o.id = ov.origin ", + ] + query_parts.append("WHERE o.url = %s") + query_params: List[Any] = [origin] + + if visit_from > 0: + op_comparison = ">" if order == "asc" else "<" + query_parts.append(f"and ov.visit {op_comparison} %s") + query_params.append(visit_from) + + if order == "asc": + query_parts.append("ORDER BY ov.visit ASC") + elif order == "desc": + query_parts.append("ORDER BY ov.visit DESC") + + query_parts.append("LIMIT %s") + query_params.append(limit) + + query = "\n".join(query_parts) + cur.execute(query, tuple(query_params)) + yield from cur + def origin_visit_get(self, origin_id, visit_id, cur=None): """Retrieve information on visit visit_id of origin origin_id. diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -862,31 +862,48 @@ def origin_visit_get( self, origin: str, - last_visit: Optional[int] = None, - limit: Optional[int] = None, + page_token: Optional[str] = None, order: str = "asc", - ) -> Iterable[Dict[str, Any]]: + limit: int = 10, + ) -> Dict[str, Any]: + result: Dict[str, Any] = {} + page_token = page_token or "0" order = order.lower() - assert order in ["asc", "desc"] - origin_url = self._get_origin_url(origin) - if origin_url in self._origin_visits: - visits = self._origin_visits[origin_url] - visits = sorted(visits, key=lambda v: v.visit, reverse=(order == "desc")) - if last_visit is not None: - if order == "asc": - visits = [v for v in visits if v.visit > last_visit] - else: - visits = [v for v in visits if v.visit < last_visit] - if limit is not None: - visits = visits[:limit] - for visit in visits: - if not visit: - continue - visit_id = visit.visit + allowed_orders = ["asc", "desc"] + if order not in allowed_orders: + raise StorageArgumentException( + f"order must be one of {', '.join(allowed_orders)}." + ) + if not isinstance(page_token, str): + raise StorageArgumentException("page_token must be a string.") - visit_update = self._origin_visit_get_updated(origin_url, visit_id) - assert visit_update is not None - yield visit_update + visit_from = int(page_token) + origin_url = self._get_origin_url(origin) + extra_limit = limit + 1 + visits = sorted( + self._origin_visits.get(origin_url, []), + key=lambda v: v.visit, + reverse=(order == "desc"), + ) + if not visits: + return result + + if visit_from > 0 and order == "asc": + visits = [v for v in visits if v.visit > visit_from] + elif visit_from > 0 and order == "desc": + visits = [v for v in visits if v.visit < visit_from] + visits = [v for v in visits if v is not None][:extra_limit] + + assert len(visits) <= extra_limit + if len(visits) == extra_limit: + last_visit = visits[limit] + visits = visits[:limit] + assert last_visit is not None + result["next_page_token"] = str(last_visit.visit) + + if visits: + result["visits"] = visits + return result def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -793,26 +793,55 @@ def origin_visit_get( self, origin: str, - last_visit: Optional[int] = None, - limit: Optional[int] = None, + page_token: Optional[str] = None, order: str = "asc", - ) -> Iterable[Dict[str, Any]]: - """Retrieve all the origin's visit's information. + limit: int = 10, + ) -> Dict[str, Any]: + """Retrieve OriginVisit information. Args: origin: The visited origin - last_visit: Starting point from which listing the next visits - Default to None - limit: Number of results to return from the last visit. - Default to None + page_token: opaque string used to get the next results of a search order: Order on visit id fields to list origin visits (default to asc) + limit: Number of visits to return - Yields: - List of visits. + Raises: + StorageArgumentException if wrong order or wrong page_token type + + Returns: + dict: dict with the following keys: + - **next_page_token** (str, optional): opaque token to be used as + `page_token` for retrieving the next page. if absent, there is + no more pages to gather. + - **visits** (Iterable[OriginVisit]): list of visits """ ... + # @remote_api_endpoint("origin/visit_status/get") + # def origin_visit_status_get( + # self, + # origin: str, + # last_visit: Optional[int] = None, + # limit: Optional[int] = None, + # order: str = "asc", + # ) -> Iterable[Optional[OriginVisitStatus]]: + # """Retrieve OriginVisit information. + + # Args: + # origin: The visited origin + # last_visit: Starting point from which listing the next visits + # Default to None + # limit: Number of results to return from the last visit. + # Default to None + # order: Order on visit id fields to list origin visits (default to asc) + + # Yields: + # List of optional (when not found) visits + + # """ + # ... + @remote_api_endpoint("origin/visit/find_by_date") def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -877,22 +877,54 @@ return OriginVisitStatus.from_dict(row) @timed - @db_transaction_generator(statement_timeout=500) + @db_transaction(statement_timeout=500) def origin_visit_get( self, origin: str, - last_visit: Optional[int] = None, - limit: Optional[int] = None, + page_token: Optional[str] = None, order: str = "asc", + limit: int = 10, db=None, cur=None, - ) -> Iterable[Dict[str, Any]]: - assert order in ["asc", "desc"] - lines = db.origin_visit_get_all( - origin, last_visit=last_visit, limit=limit, order=order, cur=cur - ) - for line in lines: - yield dict(zip(db.origin_visit_get_cols, line)) + ) -> Dict[str, Any]: + result: Dict[str, Any] = {} + page_token = page_token or "0" + order = order.lower() + allowed_orders = ["asc", "desc"] + if order not in allowed_orders: + raise StorageArgumentException( + f"order must be one of {', '.join(allowed_orders)}." + ) + if not isinstance(page_token, str): + raise StorageArgumentException("page_token must be a string.") + + visit_from = int(page_token) + visits: List[OriginVisit] = [] + extra_limit = limit + 1 + for row in db.origin_visit_get_range( + origin, visit_from=visit_from, order=order, limit=extra_limit, cur=cur + ): + row_d = dict(zip(db.origin_visit_cols, row)) + visits.append( + OriginVisit( + origin=row_d["origin"], + visit=int(row_d["visit"]), + date=row_d["date"], + type=row_d["type"], + ) + ) + + assert len(visits) <= extra_limit + + if len(visits) == extra_limit: + last_visit = visits[limit] + visits = visits[:limit] + assert last_visit is not None + result["next_page_token"] = str(last_visit.visit) + + if visits: + result["visits"] = visits + return result @timed @db_transaction(statement_timeout=500) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -159,10 +159,10 @@ cont = sample_data.content insertion_start_time = now() - actual_result = swh_storage.content_add([cont]) + result = swh_storage.content_add([cont]) insertion_end_time = now() - assert actual_result == { + assert result == { "content:add": 1, "content:add:bytes": cont.length, } @@ -193,9 +193,9 @@ def _cnt_gen(): yield cont - actual_result = swh_storage.content_add(_cnt_gen()) + result = swh_storage.content_add(_cnt_gen()) - assert actual_result == { + assert result == { "content:add": 1, "content:add:bytes": cont.length, } @@ -209,11 +209,11 @@ insertion_start_time = now() - actual_result = swh_storage.content_add([lazy_content]) + result = swh_storage.content_add([lazy_content]) insertion_end_time = now() - assert actual_result == { + assert result == { "content:add": 1, "content:add:bytes": cont.length, } @@ -260,8 +260,8 @@ def test_content_add_different_input(self, swh_storage, sample_data): cont, cont2 = sample_data.contents[:2] - actual_result = swh_storage.content_add([cont, cont2]) - assert actual_result == { + result = swh_storage.content_add([cont, cont2]) + assert result == { "content:add": 2, "content:add:bytes": cont.length + cont2.length, } @@ -269,15 +269,15 @@ def test_content_add_twice(self, swh_storage, sample_data): cont, cont2 = sample_data.contents[:2] - actual_result = swh_storage.content_add([cont]) - assert actual_result == { + result = swh_storage.content_add([cont]) + assert result == { "content:add": 1, "content:add:bytes": cont.length, } assert len(swh_storage.journal_writer.journal.objects) == 1 - actual_result = swh_storage.content_add([cont, cont2]) - assert actual_result == { + result = swh_storage.content_add([cont, cont2]) + assert result == { "content:add": 1, "content:add:bytes": cont2.length, } @@ -345,8 +345,8 @@ def test_content_add_metadata(self, swh_storage, sample_data): cont = attr.evolve(sample_data.content, data=None, ctime=now()) - actual_result = swh_storage.content_add_metadata([cont]) - assert actual_result == { + result = swh_storage.content_add_metadata([cont]) + assert result == { "content:add": 1, } @@ -371,8 +371,8 @@ cont = attr.evolve(contents[0], data=None, ctime=now()) cont2 = attr.evolve(contents[1], data=None, ctime=now()) - actual_result = swh_storage.content_add_metadata([cont, cont2]) - assert actual_result == { + result = swh_storage.content_add_metadata([cont, cont2]) + assert result == { "content:add": 2, } @@ -414,10 +414,10 @@ assert missing == [cont.hashes(), cont2.hashes()] - actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) + result = swh_storage.skipped_content_add([cont, cont, cont2]) - assert 2 <= actual_result.pop("skipped_content:add") <= 3 - assert actual_result == {} + assert 2 <= result.pop("skipped_content:add") <= 3 + assert result == {} missing = list(swh_storage.skipped_content_missing(contents_dict)) assert missing == [] @@ -431,10 +431,10 @@ missing = list(swh_storage.skipped_content_missing(contents_dict)) assert len(missing) == 2 - actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) + result = swh_storage.skipped_content_add([cont, cont, cont2]) - assert 2 <= actual_result.pop("skipped_content:add") <= 3 - assert actual_result == {} + assert 2 <= result.pop("skipped_content:add") <= 3 + assert result == {} missing = list(swh_storage.skipped_content_missing(contents_dict)) assert missing == [] @@ -447,10 +447,10 @@ missing = list(swh_storage.skipped_content_missing(contents_dict)) assert len(missing) == 2 - actual_result = swh_storage.skipped_content_add([cont]) + result = swh_storage.skipped_content_add([cont]) - assert actual_result.pop("skipped_content:add") == 1 - assert actual_result == {} + assert result.pop("skipped_content:add") == 1 + assert result == {} missing = list(swh_storage.skipped_content_missing(contents_dict)) assert missing == [cont2.hashes()] @@ -545,9 +545,9 @@ actual_contents = [] for i in range(16): - actual_result = swh_storage.content_get_partition(i, 16) - assert actual_result["next_page_token"] is None - actual_contents.extend(actual_result["contents"]) + result = swh_storage.content_get_partition(i, 16) + assert result["next_page_token"] is None + actual_contents.extend(result["contents"]) assert_contents_ok(expected_contents, actual_contents, ["sha1"]) @@ -556,10 +556,10 @@ contents""" expected_contents = [c.to_dict() for c in swh_contents if c.status != "absent"] - actual_result = swh_storage.content_get_partition(0, 1) - assert actual_result["next_page_token"] is None + result = swh_storage.content_get_partition(0, 1) + assert result["next_page_token"] is None - actual_contents = actual_result["contents"] + actual_contents = result["contents"] assert_contents_ok(expected_contents, actual_contents, ["sha1"]) def test_content_get_partition_empty(self, swh_storage, swh_contents): @@ -575,15 +575,15 @@ seen_sha1s = [] for i in range(nb_partitions): - actual_result = swh_storage.content_get_partition( + result = swh_storage.content_get_partition( i, nb_partitions, limit=len(swh_contents) + 1 ) - for cont in actual_result["contents"]: + for cont in result["contents"]: seen_sha1s.append(cont["sha1"]) # Limit is higher than the max number of results - assert actual_result["next_page_token"] is None + assert result["next_page_token"] is None assert set(seen_sha1s) == expected_contents @@ -603,11 +603,11 @@ for i in range(4): page_token = None while True: - actual_result = swh_storage.content_get_partition( + result = swh_storage.content_get_partition( i, 4, limit=3, page_token=page_token ) - actual_contents.extend(actual_result["contents"]) - page_token = actual_result["next_page_token"] + actual_contents.extend(result["contents"]) + page_token = result["next_page_token"] if page_token is None: break @@ -659,8 +659,8 @@ init_missing = list(swh_storage.directory_missing([directory.id])) assert [directory.id] == init_missing - actual_result = swh_storage.directory_add([directory]) - assert actual_result == {"directory:add": 1} + result = swh_storage.directory_add([directory]) + assert result == {"directory:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("directory", directory) @@ -683,8 +683,8 @@ def _dir_gen(): yield directory - actual_result = swh_storage.directory_add(directories=_dir_gen()) - assert actual_result == {"directory:add": 1} + result = swh_storage.directory_add(directories=_dir_gen()) + assert result == {"directory:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("directory", directory) @@ -696,15 +696,15 @@ def test_directory_add_twice(self, swh_storage, sample_data): directory = sample_data.directories[1] - actual_result = swh_storage.directory_add([directory]) - assert actual_result == {"directory:add": 1} + result = swh_storage.directory_add([directory]) + assert result == {"directory:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("directory", directory) ] - actual_result = swh_storage.directory_add([directory]) - assert actual_result == {"directory:add": 0} + result = swh_storage.directory_add([directory]) + assert result == {"directory:add": 0} assert list(swh_storage.journal_writer.journal.objects) == [ ("directory", directory) @@ -716,8 +716,8 @@ init_missing = list(swh_storage.directory_missing([dir1.id])) assert init_missing == [dir1.id] - actual_result = swh_storage.directory_add([dir1, dir2, dir3]) - assert actual_result == {"directory:add": 3} + result = swh_storage.directory_add([dir1, dir2, dir3]) + assert result == {"directory:add": 3} assert list(swh_storage.journal_writer.journal.objects) == [ ("directory", dir1), @@ -751,8 +751,8 @@ init_missing = list(swh_storage.directory_missing([dir1.id])) assert init_missing == [dir1.id] - actual_result = swh_storage.directory_add([dir1, dir2, dir3]) - assert actual_result == {"directory:add": 3} + result = swh_storage.directory_add([dir1, dir2, dir3]) + assert result == {"directory:add": 3} assert list(swh_storage.journal_writer.journal.objects) == [ ("directory", dir1), @@ -784,8 +784,8 @@ init_missing = list(swh_storage.directory_missing([dir3.id])) assert init_missing == [dir3.id] - actual_result = swh_storage.directory_add([dir3, dir4]) - assert actual_result == {"directory:add": 2} + result = swh_storage.directory_add([dir3, dir4]) + assert result == {"directory:add": 2} expected_entries = [ { @@ -864,8 +864,8 @@ init_missing = swh_storage.revision_missing([revision.id]) assert list(init_missing) == [revision.id] - actual_result = swh_storage.revision_add([revision]) - assert actual_result == {"revision:add": 1} + result = swh_storage.revision_add([revision]) + assert result == {"revision:add": 1} end_missing = swh_storage.revision_missing([revision.id]) assert list(end_missing) == [] @@ -875,8 +875,8 @@ ] # already there so nothing added - actual_result = swh_storage.revision_add([revision]) - assert actual_result == {"revision:add": 0} + result = swh_storage.revision_add([revision]) + assert result == {"revision:add": 0} swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["revision"] == 1 @@ -887,8 +887,8 @@ def _rev_gen(): yield revision - actual_result = swh_storage.revision_add(_rev_gen()) - assert actual_result == {"revision:add": 1} + result = swh_storage.revision_add(_rev_gen()) + assert result == {"revision:add": 1} swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["revision"] == 1 @@ -896,15 +896,15 @@ def test_revision_add_twice(self, swh_storage, sample_data): revision, revision2 = sample_data.revisions[:2] - actual_result = swh_storage.revision_add([revision]) - assert actual_result == {"revision:add": 1} + result = swh_storage.revision_add([revision]) + assert result == {"revision:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("revision", revision) ] - actual_result = swh_storage.revision_add([revision, revision2]) - assert actual_result == {"revision:add": 1} + result = swh_storage.revision_add([revision, revision2]) + assert result == {"revision:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("revision", revision), @@ -930,8 +930,8 @@ email=b"john.doe@example.com ", ), ) - actual_result = swh_storage.revision_add([revision1, revision2]) - assert actual_result == {"revision:add": 2} + result = swh_storage.revision_add([revision1, revision2]) + assert result == {"revision:add": 2} def test_revision_get_order(self, swh_storage, sample_data): revision, revision2 = sample_data.revisions[:2] @@ -1044,8 +1044,8 @@ init_missing = swh_storage.release_missing([release.id, release2.id]) assert list(init_missing) == [release.id, release2.id] - actual_result = swh_storage.release_add([release, release2]) - assert actual_result == {"release:add": 2} + result = swh_storage.release_add([release, release2]) + assert result == {"release:add": 2} end_missing = swh_storage.release_missing([release.id, release2.id]) assert list(end_missing) == [] @@ -1056,8 +1056,8 @@ ] # already present so nothing added - actual_result = swh_storage.release_add([release, release2]) - assert actual_result == {"release:add": 0} + result = swh_storage.release_add([release, release2]) + assert result == {"release:add": 0} swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["release"] == 2 @@ -1069,8 +1069,8 @@ yield release yield release2 - actual_result = swh_storage.release_add(_rel_gen()) - assert actual_result == {"release:add": 2} + result = swh_storage.release_add(_rel_gen()) + assert result == {"release:add": 2} assert list(swh_storage.journal_writer.journal.objects) == [ ("release", release), @@ -1084,8 +1084,8 @@ full_release = sample_data.release release = attr.evolve(full_release, author=None, date=None) - actual_result = swh_storage.release_add([release]) - assert actual_result == {"release:add": 1} + result = swh_storage.release_add([release]) + assert result == {"release:add": 1} end_missing = swh_storage.release_missing([release.id]) assert list(end_missing) == [] @@ -1097,15 +1097,15 @@ def test_release_add_twice(self, swh_storage, sample_data): release, release2 = sample_data.releases[:2] - actual_result = swh_storage.release_add([release]) - assert actual_result == {"release:add": 1} + result = swh_storage.release_add([release]) + assert result == {"release:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("release", release) ] - actual_result = swh_storage.release_add([release, release2, release, release2]) - assert actual_result == {"release:add": 1} + result = swh_storage.release_add([release, release2, release, release2]) + assert result == {"release:add": 1} assert set(swh_storage.journal_writer.journal.objects) == set( [("release", release), ("release", release2),] @@ -1124,8 +1124,8 @@ for c in sample_data.releases[:2] ] - actual_result = swh_storage.release_add([release, release2]) - assert actual_result == {"release:add": 2} + result = swh_storage.release_add([release, release2]) + assert result == {"release:add": 2} def test_release_get(self, swh_storage, sample_data): release, release2, release3 = sample_data.releases[:3] @@ -1253,10 +1253,27 @@ visits.append(date_visit) return visits + def test_origin_visit_get__unknown_origin(self, swh_storage): + actual_visit = swh_storage.origin_visit_get("foo") + assert actual_visit == {} + + def test_origin_visit_get__validation_failure(self, swh_storage, sample_data): + origin = sample_data.origin + swh_storage.origin_add([origin]) + with pytest.raises( + StorageArgumentException, match="page_token must be a string" + ): + swh_storage.origin_visit_get(origin.url, page_token=10) # not a string + + with pytest.raises( + StorageArgumentException, match="order must be one of asc, desc" + ): + swh_storage.origin_visit_get(origin.url, order="foobar") # wrong order + def test_origin_visit_get_all(self, swh_storage, sample_data): origin = sample_data.origin swh_storage.origin_add([origin]) - visits = swh_storage.origin_visit_add( + ov1, ov2, ov3 = swh_storage.origin_visit_add( [ OriginVisit( origin=origin.url, @@ -1275,59 +1292,52 @@ ), ] ) - ov1, ov2, ov3 = [ - {**v.to_dict(), "status": "created", "snapshot": None, "metadata": None,} - for v in visits - ] # order asc, no pagination, no limit - all_visits = list(swh_storage.origin_visit_get(origin.url)) - assert all_visits == [ov1, ov2, ov3] + result = swh_storage.origin_visit_get(origin.url) + assert result.get("next_page_token") is None + assert result["visits"] == [ov1, ov2, ov3] # order asc, no pagination, limit - all_visits2 = list(swh_storage.origin_visit_get(origin.url, limit=2)) - assert all_visits2 == [ov1, ov2] + result = swh_storage.origin_visit_get(origin.url, limit=2) + assert result["next_page_token"] == str(ov3.visit) + assert result["visits"] == [ov1, ov2] # order asc, pagination, no limit - all_visits3 = list( - swh_storage.origin_visit_get(origin.url, last_visit=ov1["visit"]) - ) - assert all_visits3 == [ov2, ov3] + result = swh_storage.origin_visit_get(origin.url, page_token=str(ov1.visit)) + assert result.get("next_page_token") is None + assert result["visits"] == [ov2, ov3] # order asc, pagination, limit - all_visits4 = list( - swh_storage.origin_visit_get(origin.url, last_visit=ov2["visit"], limit=1) + result = swh_storage.origin_visit_get( + origin.url, page_token=str(ov2.visit), limit=1 ) - assert all_visits4 == [ov3] + assert result.get("next_page_token") is None + assert result["visits"] == [ov3] # order desc, no pagination, no limit - all_visits5 = list(swh_storage.origin_visit_get(origin.url, order="desc")) - assert all_visits5 == [ov3, ov2, ov1] + result = swh_storage.origin_visit_get(origin.url, order="desc") + assert result.get("next_page_token") is None + assert result["visits"] == [ov3, ov2, ov1] # order desc, no pagination, limit - all_visits6 = list( - swh_storage.origin_visit_get(origin.url, limit=2, order="desc") - ) - assert all_visits6 == [ov3, ov2] + result = swh_storage.origin_visit_get(origin.url, limit=2, order="desc") + assert result["next_page_token"] == str(ov1.visit) + assert result["visits"] == [ov3, ov2] # order desc, pagination, no limit - all_visits7 = list( - swh_storage.origin_visit_get( - origin.url, last_visit=ov3["visit"], order="desc" - ) + result = swh_storage.origin_visit_get( + origin.url, page_token=str(ov3.visit), order="desc" ) - assert all_visits7 == [ov2, ov1] + assert result.get("next_page_token") is None + assert result["visits"] == [ov2, ov1] # order desc, pagination, limit - all_visits8 = list( - swh_storage.origin_visit_get( - origin.url, last_visit=ov3["visit"], order="desc", limit=1 - ) + result = swh_storage.origin_visit_get( + origin.url, page_token=str(ov3.visit), order="desc", limit=1 ) - assert all_visits8 == [ov2] - - def test_origin_visit_get__unknown_origin(self, swh_storage): - assert [] == list(swh_storage.origin_visit_get("foo")) + assert result["next_page_token"] == str(ov1.visit) + assert result["visits"] == [ov2] def test_origin_visit_status_get_random(self, swh_storage, sample_data): origins = sample_data.origins[:2] @@ -2251,8 +2261,8 @@ ] )[0] - actual_result = swh_storage.snapshot_add([empty_snapshot]) - assert actual_result == {"snapshot:add": 1} + result = swh_storage.snapshot_add([empty_snapshot]) + assert result == {"snapshot:add": 1} date_now = now() @@ -2320,7 +2330,7 @@ origin_visit1 = swh_storage.origin_visit_add([visit])[0] visit_id = origin_visit1.visit - actual_result = swh_storage.snapshot_add([complete_snapshot]) + result = swh_storage.snapshot_add([complete_snapshot]) swh_storage.origin_visit_status_add( [ OriginVisitStatus( @@ -2332,7 +2342,7 @@ ) ] ) - assert actual_result == {"snapshot:add": 1} + assert result == {"snapshot:add": 1} by_id = swh_storage.snapshot_get(complete_snapshot.id) assert by_id == {**complete_snapshot_dict, "next_branch": None} @@ -2343,8 +2353,8 @@ def test_snapshot_add_many(self, swh_storage, sample_data): snapshot, _, complete_snapshot = sample_data.snapshots[:3] - actual_result = swh_storage.snapshot_add([snapshot, complete_snapshot]) - assert actual_result == {"snapshot:add": 2} + result = swh_storage.snapshot_add([snapshot, complete_snapshot]) + assert result == {"snapshot:add": 2} assert swh_storage.snapshot_get(complete_snapshot.id) == { **complete_snapshot.to_dict(), @@ -2365,8 +2375,8 @@ def _snp_gen(): yield from [snapshot, complete_snapshot] - actual_result = swh_storage.snapshot_add(_snp_gen()) - assert actual_result == {"snapshot:add": 2} + result = swh_storage.snapshot_add(_snp_gen()) + assert result == {"snapshot:add": 2} swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()["snapshot"] == 2 @@ -2374,8 +2384,8 @@ def test_snapshot_add_many_incremental(self, swh_storage, sample_data): snapshot, _, complete_snapshot = sample_data.snapshots[:3] - actual_result = swh_storage.snapshot_add([complete_snapshot]) - assert actual_result == {"snapshot:add": 1} + result = swh_storage.snapshot_add([complete_snapshot]) + assert result == {"snapshot:add": 1} actual_result2 = swh_storage.snapshot_add([snapshot, complete_snapshot]) assert actual_result2 == {"snapshot:add": 1} @@ -2393,15 +2403,15 @@ def test_snapshot_add_twice(self, swh_storage, sample_data): snapshot, empty_snapshot = sample_data.snapshots[:2] - actual_result = swh_storage.snapshot_add([empty_snapshot]) - assert actual_result == {"snapshot:add": 1} + result = swh_storage.snapshot_add([empty_snapshot]) + assert result == {"snapshot:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("snapshot", empty_snapshot) ] - actual_result = swh_storage.snapshot_add([snapshot]) - assert actual_result == {"snapshot:add": 1} + result = swh_storage.snapshot_add([snapshot]) + assert result == {"snapshot:add": 1} assert list(swh_storage.journal_writer.journal.objects) == [ ("snapshot", empty_snapshot), @@ -2411,8 +2421,8 @@ def test_snapshot_add_count_branches(self, swh_storage, sample_data): complete_snapshot = sample_data.snapshots[2] - actual_result = swh_storage.snapshot_add([complete_snapshot]) - assert actual_result == {"snapshot:add": 1} + result = swh_storage.snapshot_add([complete_snapshot]) + assert result == {"snapshot:add": 1} snp_size = swh_storage.snapshot_count_branches(complete_snapshot.id) @@ -2952,7 +2962,7 @@ # Inject the data swh_storage.content_add([content, duplicated_content]) - actual_result = list( + result = list( swh_storage.content_find( { "blake2s256": duplicated_content.blake2s256, @@ -2968,14 +2978,14 @@ for dict_ in [ expected_content, expected_duplicated_content, - actual_result[0], - actual_result[1], + result[0], + result[1], ]: dict_.pop(key, None) expected_result = [expected_content, expected_duplicated_content] for result in expected_result: - assert result in actual_result + assert result in result def test_content_find_with_duplicate_sha256(self, swh_storage, sample_data): content = sample_data.content @@ -2995,11 +3005,9 @@ ) swh_storage.content_add([content, duplicated_content]) - actual_result = list( - swh_storage.content_find({"sha256": duplicated_content.sha256}) - ) + result = list(swh_storage.content_find({"sha256": duplicated_content.sha256})) - assert len(actual_result) == 2 + assert len(result) == 2 expected_content = content.to_dict() expected_duplicated_content = duplicated_content.to_dict() @@ -3008,18 +3016,18 @@ for dict_ in [ expected_content, expected_duplicated_content, - actual_result[0], - actual_result[1], + result[0], + result[1], ]: dict_.pop(key, None) - assert sorted(actual_result, key=lambda x: x["sha1"]) == [ + assert sorted(result, key=lambda x: x["sha1"]) == [ expected_content, expected_duplicated_content, ] # Find with both sha256 and blake2s256 - actual_result = list( + result = list( swh_storage.content_find( { "sha256": duplicated_content.sha256, @@ -3028,10 +3036,10 @@ ) ) - assert len(actual_result) == 1 - actual_result[0].pop("ctime") + assert len(result) == 1 + result[0].pop("ctime") - assert actual_result == [expected_duplicated_content] + assert result == [expected_duplicated_content] def test_content_find_with_duplicate_blake2s256(self, swh_storage, sample_data): content = sample_data.content @@ -3053,7 +3061,7 @@ swh_storage.content_add([content, duplicated_content]) - actual_result = list( + result = list( swh_storage.content_find({"blake2s256": duplicated_content.blake2s256}) ) @@ -3064,17 +3072,17 @@ for dict_ in [ expected_content, expected_duplicated_content, - actual_result[0], - actual_result[1], + result[0], + result[1], ]: dict_.pop(key, None) expected_result = [expected_content, expected_duplicated_content] for result in expected_result: - assert result in actual_result + assert result in result # Find with both sha256 and blake2s256 - actual_result = list( + result = list( swh_storage.content_find( { "sha256": duplicated_content.sha256, @@ -3083,8 +3091,8 @@ ) ) - actual_result[0].pop("ctime") - assert actual_result == [expected_duplicated_content] + result[0].pop("ctime") + assert result == [expected_duplicated_content] def test_content_find_bad_input(self, swh_storage): # 1. with bad input @@ -3682,11 +3690,11 @@ get_sha1s = sorted([c.sha1 for c in swh_contents if c.status != "absent"]) start = get_sha1s[2] end = get_sha1s[-2] - actual_result = swh_storage.content_get_range(start, end) + result = swh_storage.content_get_range(start, end) - assert actual_result["next"] is None + assert result["next"] is None - actual_contents = actual_result["contents"] + actual_contents = result["contents"] expected_contents = [c for c in present_contents if start <= c["sha1"] <= end] if expected_contents: assert_contents_ok(expected_contents, actual_contents, ["sha1"]) @@ -3699,10 +3707,10 @@ start = b"0" * 40 end = b"f" * 40 - actual_result = swh_storage.content_get_range(start, end) - assert actual_result["next"] is None + result = swh_storage.content_get_range(start, end) + assert result["next"] is None - actual_contents = actual_result["contents"] + actual_contents = result["contents"] expected_contents = [c for c in present_contents if start <= c["sha1"] <= end] if expected_contents: assert_contents_ok(expected_contents, actual_contents, ["sha1"]) @@ -3713,9 +3721,9 @@ """content_get_range for an empty range returns nothing""" start = b"0" * 40 end = b"f" * 40 - actual_result = swh_storage.content_get_range(end, start) - assert actual_result["next"] is None - assert len(actual_result["contents"]) == 0 + result = swh_storage.content_get_range(end, start) + assert result["next"] is None + assert len(result["contents"]) == 0 def test_generate_content_get_range_limit_none(self, swh_storage): """content_get_range call with wrong limit input should fail""" @@ -3732,10 +3740,10 @@ end = get_sha1s[-1] # retrieve contents - actual_result = swh_storage.content_get_range(start, end) + result = swh_storage.content_get_range(start, end) - actual_contents = actual_result["contents"] - assert actual_result["next"] is None + actual_contents = result["contents"] + assert result["next"] is None assert len(actual_contents) == len(get_sha1s) expected_contents = [c.to_dict() for c in swh_contents if c.status != "absent"] @@ -3752,10 +3760,10 @@ # retrieve contents limited to n-1 results limited_results = len(get_sha1s) - 1 - actual_result = swh_storage.content_get_range(start, end, limit=limited_results) + result = swh_storage.content_get_range(start, end, limit=limited_results) - actual_contents = actual_result["contents"] - assert actual_result["next"] == get_sha1s[-1] + actual_contents = result["contents"] + assert result["next"] == get_sha1s[-1] assert len(actual_contents) == limited_results expected_contents = [contents_map[sha1] for sha1 in get_sha1s[:-1]] @@ -4040,9 +4048,9 @@ def test_content_add_db(self, swh_storage, sample_data): content = sample_data.content - actual_result = swh_storage.content_add([content]) + result = swh_storage.content_add([content]) - assert actual_result == { + assert result == { "content:add": 1, "content:add:bytes": content.length, } @@ -4077,9 +4085,9 @@ def test_content_add_metadata_db(self, swh_storage, sample_data): content = attr.evolve(sample_data.content, data=None, ctime=now()) - actual_result = swh_storage.content_add_metadata([content]) + result = swh_storage.content_add_metadata([content]) - assert actual_result == { + assert result == { "content:add": 1, } @@ -4112,10 +4120,10 @@ content, cont2 = sample_data.skipped_contents[:2] content2 = attr.evolve(cont2, blake2s256=None) - actual_result = swh_storage.skipped_content_add([content, content, content2]) + result = swh_storage.skipped_content_add([content, content, content2]) - assert 2 <= actual_result.pop("skipped_content:add") <= 3 - assert actual_result == {} + assert 2 <= result.pop("skipped_content:add") <= 3 + assert result == {} with db_transaction(swh_storage) as (_, cur): cur.execute(