diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -27,7 +27,7 @@ python3-yaml, python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.118~), + python3-swh.storage (>= 0.0.120~), python3-swh.indexer (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), @@ -39,7 +39,7 @@ Architecture: all Depends: python3-swh.core (>= 0.0.40~), python3-swh.model (>= 0.0.25~), - python3-swh.storage (>= 0.0.118~), + python3-swh.storage (>= 0.0.120~), python3-swh.indexer.storage (>= 0.0.120~), python3-swh.vault (>= 0.0.20~), python3-swh.scheduler (>= 0.0.31~), diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ swh.core >= 0.0.40 swh.model >= 0.0.25 -swh.storage >= 0.0.118 +swh.storage >= 0.0.120 swh.vault >= 0.0.20 swh.indexer >= 0.0.120 swh.scheduler >= 0.0.31 \ No newline at end of file diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -24,6 +24,59 @@ return origin +@api_route(r'/origins/', 'api-origins') +@api_doc('/origins/', noargs=True) +def api_origins(request): + """ + .. http:get:: /api/1/origins/ + + Get list of archived software origins. + + Origins are sorted by ids before returning them. + + :query int origin_from: The minimum id of the origins to return + (default to 1) + :query int origin_count: The maximum number of origins to return + (default to 100, can not exceed 10000) + + :>jsonarr number id: the origin unique identifier + :>jsonarr string origin_visits_url: link to in order to get information about the + visits for that origin + :>jsonarr string type: the type of software origin (possible values are ``git``, ``svn``, + ``hg``, ``deb``, ``pypi``, ``ftp`` or ``deposit``) + :>jsonarr string url: the origin canonical url + + :reqheader Accept: the requested response content type, + either ``application/json`` (default) or ``application/yaml`` + :resheader Content-Type: this depends on :http:header:`Accept` header of request + :resheader Link: indicates that a subsequent or previous result page are available + and contains the urls pointing to them + + **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` + + :statuscode 200: no error + + **Example:** + + .. parsed-literal:: + + :swh_web_api:`origins?origin_from=50000&origin_count=500` + """ # noqa + origin_from = int(request.query_params.get('origin_from', '1')) + origin_count = int(request.query_params.get('origin_count', '100')) + origin_count = min(origin_count, 10000) + results = api_lookup( + service.lookup_origins, origin_from, origin_count+1, + enrich_fn=_enrich_origin) + response = {'results': results, 'headers': {}} + if len(results) > origin_count: + origin_from = results.pop()['id'] + response['headers']['link-next'] = reverse( + 'api-origins', query_params={'origin_from': origin_from, + 'origin_count': origin_count}) + return response + + @api_route(r'/origin/(?P[0-9]+)/', 'api-origin') @api_route(r'/origin/(?P[a-z]+)/url/(?P.+)/', 'api-origin') diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -230,6 +230,22 @@ return converters.from_origin(origin_info) +def lookup_origins(origin_from=1, origin_count=100): + """Get list of archived software origins in a paginated way. + + Origins are sorted by id before returning them + + Args: + origin_from (int): The minimum id of the origins to return + origin_count (int): The maximum number of origins to return + + Yields: + origins information as dicts + """ + origins = storage.origin_get_range(origin_from, origin_count) + return map(converters.from_origin, origins) + + def search_origin(url_pattern, offset=0, limit=50, regexp=False, with_visit=False): """Search for origins whose urls contain a provided string pattern diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -3,6 +3,8 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +import random + from hypothesis import given from rest_framework.test import APITestCase from unittest.mock import patch @@ -12,7 +14,7 @@ from swh.web.common.utils import reverse from swh.web.common.origin_visits import get_origin_visits from swh.web.tests.strategies import ( - origin, new_origin, visit_dates, new_snapshots + origin, new_origin, new_origins, visit_dates, new_snapshots ) from swh.web.tests.testcase import WebTestCase @@ -330,3 +332,42 @@ self.assertEqual(rv.status_code, 400, rv.content) mock_idx_storage.assert_not_called() + + @given(new_origins(20)) + def test_api_lookup_origins(self, new_origins): + + nb_origins = len(new_origins) + + expected_origins = self.storage.origin_add(new_origins) + + origin_from_idx = random.randint(1, nb_origins-1) - 1 + origin_from = expected_origins[origin_from_idx]['id'] + max_origin_id = expected_origins[-1]['id'] + origin_count = random.randint(1, max_origin_id - origin_from) + + url = reverse('api-origins', + query_params={'origin_from': origin_from, + 'origin_count': origin_count}) + + rv = self.client.get(url) + + self.assertEqual(rv.status_code, 200) + + start = origin_from_idx + end = origin_from_idx + origin_count + expected_origins = expected_origins[start:end] + + for expected_origin in expected_origins: + expected_origin['origin_visits_url'] = reverse( + 'api-origin-visits', + url_args={'origin_id': expected_origin['id']}) + + self.assertEqual(rv.data, expected_origins) + + next_origin_id = expected_origins[-1]['id']+1 + if self.storage.origin_get({'id': next_origin_id}): + self.assertIn('Link', rv) + next_url = reverse('api-origins', + query_params={'origin_from': next_origin_id, + 'origin_count': origin_count}) + self.assertIn(next_url, rv['Link']) diff --git a/swh/web/tests/common/test_service.py b/swh/web/tests/common/test_service.py --- a/swh/web/tests/common/test_service.py +++ b/swh/web/tests/common/test_service.py @@ -20,7 +20,7 @@ release, revision, unknown_revision, revisions, unknown_revisions, ancestor_revisions, non_ancestor_revisions, invalid_sha1, sha256, revision_with_submodules, unknown_directory, empty_directory, - new_revision + new_revision, new_origins ) from swh.web.tests.testcase import ( WebTestCase, ctags_json_missing, fossology_missing @@ -788,3 +788,21 @@ service.lookup_directory_with_revision( revision, dir_entry['name'], with_data=True)) ) + + @given(new_origins(20)) + def test_lookup_origins(self, new_origins): + + nb_origins = len(new_origins) + expected_origins = self.storage.origin_add(new_origins) + + origin_from_idx = random.randint(1, nb_origins-1) - 1 + origin_from = expected_origins[origin_from_idx]['id'] + max_origin_idx = expected_origins[-1]['id'] + origin_count = random.randint(1, max_origin_idx - origin_from) + + actual_origins = list(service.lookup_origins(origin_from, + origin_count)) + expected_origins = list(self.storage.origin_get_range(origin_from, + origin_count)) + + self.assertEqual(actual_origins, expected_origins) diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py --- a/swh/web/tests/strategies.py +++ b/swh/web/tests/strategies.py @@ -164,6 +164,18 @@ lambda origin: storage.origin_get(origin) is None) +def new_origins(nb_origins=None): + """ + Hypothesis strategy returning random origins not ingested + into the test archive. + """ + min_size = nb_origins if nb_origins is not None else 2 + max_size = nb_origins if nb_origins is not None else 8 + size = random.randint(min_size, max_size) + return lists(new_origin(), min_size=size, max_size=size, + unique_by=lambda o: tuple(sorted(o.items()))) + + def visit_dates(nb_dates=None): """ Hypothesis strategy returning a list of visit dates.