diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py index 47f3d291..d66ee47d 100644 --- a/swh/deposit/api/collection.py +++ b/swh/deposit/api/collection.py @@ -1,135 +1,175 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Optional, Tuple +from django.shortcuts import render from rest_framework import status +from rest_framework.generics import ListAPIView from ..config import DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_IRI from ..models import Deposit from ..parsers import ( SWHAtomEntryParser, SWHFileUploadTarParser, SWHFileUploadZipParser, SWHMultiPartParser, ) from .common import ( ACCEPT_ARCHIVE_CONTENT_TYPES, APIPost, ParsedRequestHeaders, Receipt, get_collection_by_name, ) +from .utils import DefaultPagination, DepositSerializer -class CollectionAPI(APIPost): +class CollectionAPI(ListAPIView, APIPost): """Deposit request class defining api endpoints for sword deposit. What's known as 'Col-IRI' in the sword specification. - HTTP verbs supported: POST + HTTP verbs supported: GET and POST """ parser_classes = ( SWHMultiPartParser, SWHFileUploadZipParser, SWHFileUploadTarParser, SWHAtomEntryParser, ) + serializer_class = DepositSerializer + pagination_class = DefaultPagination + + def get(self, request, *args, **kwargs): + """List the user's collection if the user has access to said collection. + + """ + self.checks(request, kwargs["collection_name"]) + paginated_result = super().get(request, *args, **kwargs) + data = paginated_result.data + # Build pagination link headers + links = [] + for link_name in ["next", "previous"]: + link = data.get(link_name) + if link is None: + continue + links.append(f'<{link}>; rel="{link_name}"') + response = render( + request, + "deposit/collection_list.xml", + context={ + "count": data["count"], + "results": [dict(d) for d in data["results"]], + }, + content_type="application/xml", + status=status.HTTP_200_OK, + ) + response._headers["Link"] = ",".join(links) + return response + + def get_queryset(self): + """List the deposits for the authenticated user (pagination is handled by the + `pagination_class` class attribute). + + """ + return Deposit.objects.filter(client=self.request.user.id).order_by("id") + def process_post( self, req, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit] = None, ) -> Tuple[int, str, Receipt]: """Create a first deposit as: - archive deposit (1 zip) - multipart (1 zip + 1 atom entry) - atom entry Args: req (Request): the request holding the information to parse and inject in db collection_name (str): the associated client Returns: An http response (HttpResponse) according to the situation. If everything is ok, a 201 response (created) with a deposit receipt. Raises: - archive deposit: - 400 (bad request) if the request is not providing an external identifier - 403 (forbidden) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. - 415 (unsupported media type) if a wrong media type is provided - multipart deposit: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided - Atom entry deposit: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ assert deposit is None deposit = self._deposit_create(req, collection_name, external_id=headers.slug) if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES: receipt = self._binary_upload(req, headers, collection_name, deposit) elif req.content_type.startswith("multipart/"): receipt = self._multipart_upload(req, headers, collection_name, deposit) else: receipt = self._atom_entry(req, headers, collection_name, deposit) return status.HTTP_201_CREATED, EDIT_IRI, receipt def _deposit_create( self, request, collection_name: str, external_id: Optional[str] ) -> Deposit: collection = get_collection_by_name(collection_name) client = self.get_client(request) deposit_parent: Optional[Deposit] = None if external_id: # TODO: delete this when clients stopped relying on the slug try: # find a deposit parent (same external id, status load to success) deposit_parent = ( Deposit.objects.filter( client=client, external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS, ) .order_by("-id")[0:1] .get() ) except Deposit.DoesNotExist: # then no parent for that deposit, deposit_parent already None pass return Deposit( collection=collection, external_id=external_id or "", client=client, parent=deposit_parent, ) diff --git a/swh/deposit/api/collection_list.py b/swh/deposit/api/collection_list.py deleted file mode 100644 index a99865e4..00000000 --- a/swh/deposit/api/collection_list.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (C) 2021 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from django.shortcuts import render -from rest_framework import status -from rest_framework.generics import ListAPIView - -from swh.deposit.api.common import APIBase -from swh.deposit.api.utils import DefaultPagination, DepositSerializer -from swh.deposit.models import Deposit - - -class CollectionListAPI(ListAPIView, APIBase): - """Deposit request class to list the user deposits. - - HTTP verbs supported: ``GET`` - - """ - - serializer_class = DepositSerializer - pagination_class = DefaultPagination - - def get(self, request, *args, **kwargs): - """List the user's collection if the user has access to said collection. - - """ - self.checks(request, kwargs["collection_name"]) - paginated_result = super().get(request, *args, **kwargs) - data = paginated_result.data - # Build pagination link headers - links = [] - for link_name in ["next", "previous"]: - link = data.get(link_name) - if link is None: - continue - links.append(f'<{link}>; rel="{link_name}"') - response = render( - request, - "deposit/collection_list.xml", - context={ - "count": data["count"], - "results": [dict(d) for d in data["results"]], - }, - content_type="application/xml", - status=status.HTTP_200_OK, - ) - response._headers["Link"] = ",".join(links) - return response - - def get_queryset(self): - """List the deposits for the authenticated user (pagination is handled by the - `pagination_class` class attribute). - - """ - return Deposit.objects.filter(client=self.request.user.id).order_by("id") diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py index e20181d1..a8b7146b 100644 --- a/swh/deposit/api/urls.py +++ b/swh/deposit/api/urls.py @@ -1,91 +1,76 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SWH's deposit api URL Configuration """ from django.conf.urls import url from django.shortcuts import render -from ..config import ( - COL_IRI, - COLLECTION_LIST, - CONT_FILE_IRI, - EDIT_IRI, - EM_IRI, - SD_IRI, - SE_IRI, - STATE_IRI, -) +from ..config import COL_IRI, CONT_FILE_IRI, EDIT_IRI, EM_IRI, SD_IRI, SE_IRI, STATE_IRI from .collection import CollectionAPI -from .collection_list import CollectionListAPI from .content import ContentAPI from .edit import EditAPI from .edit_media import EditMediaAPI from .service_document import ServiceDocumentAPI from .state import StateAPI from .sword_edit import SwordEditAPI def api_view(req): return render(req, "api.html") # PUBLIC API urlpatterns = [ # simple view on the api url(r"^$", api_view, name="api"), # SD IRI - Service Document IRI # -> GET url(r"^servicedocument/", ServiceDocumentAPI.as_view(), name=SD_IRI), # Col-IRI - Collection IRI # -> POST url(r"^(?P[^/]+)/$", CollectionAPI.as_view(), name=COL_IRI), # EM IRI - Atom Edit Media IRI (update archive IRI) # -> PUT (update-in-place existing archive) # -> POST (add new archive) url( r"^(?P[^/]+)/(?P[^/]+)/media/$", EditMediaAPI.as_view(), name=EM_IRI, ), # Edit IRI - Atom Entry Edit IRI (update metadata IRI) # -> PUT (update in place) # -> DELETE (delete container) url( r"^(?P[^/]+)/(?P[^/]+)/atom/$", EditAPI.as_view(), name=EDIT_IRI, ), # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> POST (add new metadata) url( r"^(?P[^/]+)/(?P[^/]+)/metadata/$", SwordEditAPI.as_view(), name=SE_IRI, ), # State IRI # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/status/$", StateAPI.as_view(), name=STATE_IRI, ), # Cont-IRI # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/content/$", ContentAPI.as_view(), name=CONT_FILE_IRI, ), # specification is not clear about # File-IRI, we assume it's the same as # the Cont-IRI one - url( - r"^(?P[^/]+)/list/$", - CollectionListAPI.as_view(), - name=COLLECTION_LIST, - ), ] diff --git a/swh/deposit/config.py b/swh/deposit/config.py index 8ec4288c..30424bf6 100644 --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,119 +1,118 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict from swh.core import config from swh.deposit import __version__ from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher from swh.scheduler import get_scheduler from swh.scheduler.interface import SchedulerInterface from swh.storage import get_storage from swh.storage.interface import StorageInterface # IRIs (Internationalized Resource identifier) sword 2.0 specified EDIT_IRI = "edit_iri" SE_IRI = "se_iri" EM_IRI = "em_iri" CONT_FILE_IRI = "cont_file_iri" SD_IRI = "servicedocument" COL_IRI = "upload" STATE_IRI = "state_iri" -COLLECTION_LIST = "collection-list" PRIVATE_GET_RAW_CONTENT = "private-download" PRIVATE_CHECK_DEPOSIT = "check-deposit" PRIVATE_PUT_DEPOSIT = "private-update" PRIVATE_GET_DEPOSIT_METADATA = "private-read" PRIVATE_LIST_DEPOSITS = "private-deposit-list" ARCHIVE_KEY = "archive" METADATA_KEY = "metadata" RAW_METADATA_KEY = "raw-metadata" ARCHIVE_TYPE = "archive" METADATA_TYPE = "metadata" AUTHORIZED_PLATFORMS = ["development", "production", "testing"] DEPOSIT_STATUS_REJECTED = "rejected" DEPOSIT_STATUS_PARTIAL = "partial" DEPOSIT_STATUS_DEPOSITED = "deposited" DEPOSIT_STATUS_VERIFIED = "verified" DEPOSIT_STATUS_LOAD_SUCCESS = "done" DEPOSIT_STATUS_LOAD_FAILURE = "failed" # Revision author for deposit SWH_PERSON = { "name": "Software Heritage", "fullname": "Software Heritage", "email": "robot@softwareheritage.org", } DEFAULT_CONFIG = { "max_upload_size": 209715200, "checks": True, } def setup_django_for(platform=None, config_file=None): """Setup function for command line tools (swh.deposit.create_user) to initialize the needed db access. Note: Do not import any django related module prior to this function call. Otherwise, this will raise an django.core.exceptions.ImproperlyConfigured error message. Args: platform (str): the platform the scheduling is running config_file (str): Extra configuration file (typically for the production platform) Raises: ValueError in case of wrong platform inputs. """ if platform is not None: if platform not in AUTHORIZED_PLATFORMS: raise ValueError("Platform should be one of %s" % AUTHORIZED_PLATFORMS) if "DJANGO_SETTINGS_MODULE" not in os.environ: os.environ["DJANGO_SETTINGS_MODULE"] = "swh.deposit.settings.%s" % platform if config_file: os.environ.setdefault("SWH_CONFIG_FILENAME", config_file) import django django.setup() class APIConfig: """API Configuration centralized class. This loads explicitly the configuration file out of the SWH_CONFIG_FILENAME environment variable. """ def __init__(self): self.config: Dict[str, Any] = config.load_from_envvar(DEFAULT_CONFIG) self.scheduler: SchedulerInterface = get_scheduler(**self.config["scheduler"]) self.tool = { "name": "swh-deposit", "version": __version__, "configuration": {"sword_version": "2"}, } self.storage: StorageInterface = get_storage(**self.config["storage"]) self.storage_metadata: StorageInterface = get_storage( **self.config["storage_metadata"] ) def swh_deposit_authority(self): return MetadataAuthority( type=MetadataAuthorityType.REGISTRY, url=self.config["swh_authority_url"], ) def swh_deposit_fetcher(self): return MetadataFetcher(name=self.tool["name"], version=self.tool["version"],) diff --git a/swh/deposit/tests/api/test_collection_list.py b/swh/deposit/tests/api/test_collection_list.py index 01aac4ea..9a54a461 100644 --- a/swh/deposit/tests/api/test_collection_list.py +++ b/swh/deposit/tests/api/test_collection_list.py @@ -1,117 +1,113 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import BytesIO from django.urls import reverse_lazy as reverse from requests.utils import parse_header_links from rest_framework import status -from swh.deposit.config import ( - COLLECTION_LIST, - DEPOSIT_STATUS_DEPOSITED, - DEPOSIT_STATUS_PARTIAL, -) +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL from swh.deposit.models import DepositCollection from swh.deposit.parsers import parse_xml def test_deposit_collection_list_is_auth_protected(anonymous_client): """Deposit list should require authentication """ - url = reverse(COLLECTION_LIST, args=("test",)) + url = reverse(COL_IRI, args=("test",)) response = anonymous_client.get(url) assert response.status_code == status.HTTP_401_UNAUTHORIZED assert b"protected by basic authentication" in response.content def test_deposit_collection_list_collection_access_restricted_to_user_coll( deposit_another_collection, deposit_user, authenticated_client ): """Deposit list api should restrict access to user's collection """ collection_id = authenticated_client.deposit_client.collections[0] coll = DepositCollection.objects.get(pk=collection_id) # authenticated_client has access to the "coll" collection coll2 = deposit_another_collection assert coll.name != coll2.name # but does not have access to that coll2 collection - url = reverse(COLLECTION_LIST, args=(coll2.name,)) + url = reverse(COL_IRI, args=(coll2.name,)) response = authenticated_client.get(url) # so it gets rejected access to the listing of that coll2 collection assert response.status_code == status.HTTP_403_FORBIDDEN msg = f"{deposit_user.username} cannot access collection {coll2.name}" assert msg in response.content.decode("utf-8") def test_deposit_collection_list_nominal( partial_deposit, deposited_deposit, authenticated_client ): """Deposit list api should return the user deposits in a paginated way """ client_id = authenticated_client.deposit_client.id assert partial_deposit.client.id == client_id assert deposited_deposit.client.id == client_id # Both deposit were deposited by the authenticated client # so requesting the listing of the deposits, both should be listed deposit_id = str(partial_deposit.id) deposit_id2 = str(deposited_deposit.id) coll = partial_deposit.collection # requesting the listing of the deposit for the user's collection - url = reverse(COLLECTION_LIST, args=(coll.name,)) + url = reverse(COL_IRI, args=(coll.name,)) response = authenticated_client.get(f"{url}?page_size=1") assert response.status_code == status.HTTP_200_OK data = parse_xml(BytesIO(response.content)) assert ( data["swh:count"] == "2" ) # total result of 2 deposits if consuming all results header_link = parse_header_links(response._headers["Link"]) assert len(header_link) == 1 # only 1 next link expected_next = f"{url}?page=2&page_size=1" assert header_link[0]["url"].endswith(expected_next) assert header_link[0]["rel"] == "next" # only one deposit in the response deposit = data["swh:deposits"]["swh:deposit"] # dict as only 1 value (a-la js) assert isinstance(deposit, dict) assert deposit["swh:id"] == deposit_id assert deposit["swh:status"] == DEPOSIT_STATUS_PARTIAL # then 2nd page response2 = authenticated_client.get(expected_next) assert response2.status_code == status.HTTP_200_OK data2 = parse_xml(BytesIO(response2.content)) assert data2["swh:count"] == "2" # still total of 2 deposits across all results expected_previous = f"{url}?page_size=1" header_link2 = parse_header_links(response2._headers["Link"]) assert len(header_link2) == 1 # only 1 previous link assert header_link2[0]["url"].endswith(expected_previous) assert header_link2[0]["rel"] == "previous" # only 1 deposit in the response deposit2 = data2["swh:deposits"]["swh:deposit"] # dict as only 1 value (a-la js) assert isinstance(deposit2, dict) assert deposit2["swh:id"] == deposit_id2 assert deposit2["swh:status"] == DEPOSIT_STATUS_DEPOSITED # Retrieve every deposit in one query (no page_size parameter) response3 = authenticated_client.get(url) assert response3.status_code == status.HTTP_200_OK data3 = parse_xml(BytesIO(response3.content)) assert data3["swh:count"] == "2" # total result of 2 deposits across all results deposits3 = data3["swh:deposits"]["swh:deposit"] # list here assert isinstance(deposits3, list) assert len(deposits3) == 2 header_link3 = parse_header_links(response3._headers["Link"]) assert header_link3 == [] # no pagination as all results received in one round assert deposit in deposits3 assert deposit2 in deposits3