diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index e10ac098..926c4296 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,1269 +1,1276 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from abc import ABCMeta, abstractmethod import datetime import hashlib import json from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union import uuid import attr from django.core.files.uploadedfile import UploadedFile from django.http import FileResponse, HttpResponse from django.shortcuts import render from django.template.loader import render_to_string from django.urls import reverse from django.utils import timezone from rest_framework import status -from rest_framework.authentication import BaseAuthentication +from rest_framework.authentication import BaseAuthentication, BasicAuthentication from rest_framework.permissions import BasePermission, IsAuthenticated from rest_framework.request import Request from rest_framework.views import APIView from swh.deposit.api.checks import check_metadata from swh.deposit.api.converters import convert_status_detail from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication from swh.deposit.models import Deposit from swh.deposit.utils import compute_metadata_context from swh.model import hashutil from swh.model.identifiers import ( ExtendedObjectType, ExtendedSWHID, QualifiedSWHID, ValidationError, ) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, Origin, RawExtrinsicMetadata, ) from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( ARCHIVE_KEY, ARCHIVE_TYPE, CONT_FILE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, EDIT_IRI, EM_IRI, METADATA_KEY, METADATA_TYPE, RAW_METADATA_KEY, SE_IRI, STATE_IRI, APIConfig, ) from ..errors import ( BAD_REQUEST, CHECKSUM_MISMATCH, ERROR_CONTENT, FORBIDDEN, MAX_UPLOAD_SIZE_EXCEEDED, MEDIATION_NOT_ALLOWED, METHOD_NOT_ALLOWED, NOT_FOUND, PARSING_ERROR, DepositError, ParserError, ) from ..models import DepositClient, DepositCollection, DepositRequest from ..parsers import parse_xml from ..utils import extended_swhid_from_qualified, parse_swh_reference ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] @attr.s class ParsedRequestHeaders: content_type = attr.ib(type=str) content_length = attr.ib(type=Optional[int]) in_progress = attr.ib(type=bool) content_disposition = attr.ib(type=Optional[str]) content_md5sum = attr.ib(type=Optional[bytes]) packaging = attr.ib(type=Optional[str]) slug = attr.ib(type=Optional[str]) on_behalf_of = attr.ib(type=Optional[str]) metadata_relevant = attr.ib(type=Optional[str]) swhid = attr.ib(type=Optional[str]) @attr.s class Receipt: """Data computed while handling the request body that will be served in the Deposit Receipt.""" deposit_id = attr.ib(type=int) deposit_date = attr.ib(type=datetime.datetime) status = attr.ib(type=str) archive = attr.ib(type=Optional[str]) def _compute_md5(filehandler: UploadedFile) -> bytes: h = hashlib.md5() for chunk in filehandler: h.update(chunk) # type: ignore return h.digest() def get_deposit_by_id( deposit_id: int, collection_name: Optional[str] = None ) -> Deposit: """Gets an existing Deposit object if it exists, or raises `DepositError`. If `collection` is not None, also checks the deposit belongs to the collection.""" try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: raise DepositError(NOT_FOUND, f"Deposit {deposit_id} does not exist") if collection_name and deposit.collection.name != collection_name: get_collection_by_name(collection_name) # raises if does not exist raise DepositError( NOT_FOUND, f"Deposit {deposit_id} does not belong to collection {collection_name}", ) return deposit def get_collection_by_name(collection_name: str): """Gets an existing Deposit object if it exists, or raises `DepositError`.""" try: collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: raise DepositError(NOT_FOUND, f"Unknown collection name {collection_name}") assert collection is not None return collection def guess_deposit_origin_url(deposit: Deposit): """Guesses an origin url for the given deposit.""" external_id = deposit.external_id if not external_id: # The client provided neither an origin_url nor a slug. That's inconvenient, # but SWORD requires we support it. So let's generate a random slug. external_id = str(uuid.uuid4()) return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id) def check_client_origin(client: DepositClient, origin_url: str): provider_url = client.provider_url.rstrip("/") + "/" if not origin_url.startswith(provider_url): raise DepositError( FORBIDDEN, - f"Cannot create origin {origin_url}, it must start with " f"{provider_url}", + f"Cannot create origin {origin_url}, it must start with {provider_url}", ) -class AuthenticatedAPIView(APIView): - """Mixin intended as a based API view to enforce the basic - authentication check - - """ - - authentication_classes: Sequence[Type[BaseAuthentication]] = ( - KeycloakBasicAuthentication, - ) - permission_classes: Sequence[Type[BasePermission]] = ( - IsAuthenticated, - HasDepositPermission, - ) - - -class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): +class APIBase(APIConfig, APIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ _client: Optional[DepositClient] = None + def __init__(self): + super().__init__() + auth_provider = self.config.get("authentication_provider") + if auth_provider == "basic": + self.authentication_classes: Sequence[Type[BaseAuthentication]] = ( + BasicAuthentication, + ) + self.permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,) + elif auth_provider == "keycloak": + self.authentication_classes: Sequence[Type[BaseAuthentication]] = ( + KeycloakBasicAuthentication, + ) + self.permission_classes: Sequence[Type[BasePermission]] = ( + IsAuthenticated, + HasDepositPermission, + ) + else: + raise ValueError( + "Configuration key 'authentication_provider' should be provided with" + f"either 'basic' or 'keycloak' value not {auth_provider!r}." + ) + def _read_headers(self, request: Request) -> ParsedRequestHeaders: """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: request: Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = request._request.META content_length = meta.get("CONTENT_LENGTH") if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get("HTTP_IN_PROGRESS", False) if isinstance(in_progress, str): in_progress = in_progress.lower() == "true" content_md5sum = meta.get("HTTP_CONTENT_MD5") if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) return ParsedRequestHeaders( content_type=request.content_type, content_length=content_length, in_progress=in_progress, content_disposition=meta.get("HTTP_CONTENT_DISPOSITION"), content_md5sum=content_md5sum, packaging=meta.get("HTTP_PACKAGING"), slug=meta.get("HTTP_SLUG"), on_behalf_of=meta.get("HTTP_ON_BEHALF_OF"), metadata_relevant=meta.get("HTTP_METADATA_RELEVANT"), swhid=meta.get("HTTP_X_CHECK_SWHID"), ) def _deposit_put(self, deposit: Deposit, in_progress: bool = False) -> None: """Save/Update a deposit in db. Args: deposit: deposit being updated/created in_progress: deposit status """ if in_progress is False: self._complete_deposit(deposit) else: deposit.status = DEPOSIT_STATUS_PARTIAL deposit.save() def _complete_deposit(self, deposit: Deposit) -> None: """Marks the deposit as 'deposited', then schedule a check task if configured to do so.""" deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() if not deposit.origin_url: deposit.origin_url = guess_deposit_origin_url(deposit) if self.config["checks"]: scheduler = self.scheduler if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: task = create_oneshot_task_dict( "check-deposit", collection=deposit.collection.name, deposit_id=deposit.id, retries_left=3, ) check_task_id = scheduler.create_tasks([task])[0]["id"] deposit.check_task_id = check_task_id deposit.save() def _deposit_request_put( self, deposit: Deposit, deposit_request_data: Dict[str, Any], replace_metadata: bool = False, replace_archives: bool = False, ) -> DepositRequest: """Save a deposit request with metadata attached to a deposit. Args: deposit: The deposit concerned by the request deposit_request_data: The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata: Flag defining if we add or update existing metadata to the deposit replace_archives: Flag defining if we add or update archives to existing deposit Returns: the DepositRequest object stored in the backend """ if replace_metadata: DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file ) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data[RAW_METADATA_KEY] deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata.decode("utf-8"), ) deposit_request.save() assert deposit_request is not None return deposit_request def _delete_archives(self, collection_name: str, deposit: Deposit) -> Dict: """Delete archive references from the deposit id. """ DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name: str, deposit: Deposit) -> Dict: """Delete deposit reference. Args: collection_name: Client's collection deposit: The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ if deposit.collection.name != collection_name: summary = "Cannot delete a deposit from another collection" description = "Deposit %s does not belong to the collection %s" % ( deposit.id, collection_name, ) raise DepositError( BAD_REQUEST, summary=summary, verbose_description=description ) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_file_length( self, filehandler: UploadedFile, content_length: Optional[int] = None, ) -> None: """Check the filehandler passed as argument has exactly the expected content_length Args: filehandler: The file to check content_length: the expected length if provided. Raises: DepositError if the actual length does not match """ max_upload_size = self.config["max_upload_size"] if content_length: length = filehandler.size if length != content_length: raise DepositError(status.HTTP_412_PRECONDITION_FAILED, "Wrong length") if filehandler.size > max_upload_size: raise DepositError( MAX_UPLOAD_SIZE_EXCEEDED, f"Upload size limit exceeded (max {max_upload_size} bytes)." "Please consider sending the archive in multiple steps.", ) def _check_file_md5sum( self, filehandler: UploadedFile, md5sum: Optional[bytes], ) -> None: """Check the filehandler passed as argument has the expected md5sum Args: filehandler: The file to check md5sum: md5 hash expected from the file's content Raises: DepositError if the md5sum does not match """ if md5sum: _md5sum = _compute_md5(filehandler) if _md5sum != md5sum: raise DepositError( CHECKSUM_MISMATCH, "Wrong md5 hash", f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual " f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.", ) def _binary_upload( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, ) -> Receipt: """Binary upload routine. Other than such a request, a 415 response is returned. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be updated replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Raises: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers.content_length if not content_length: raise DepositError( BAD_REQUEST, "CONTENT_LENGTH header is mandatory", "For archive deposit, the CONTENT_LENGTH header must be sent.", ) content_disposition = headers.content_disposition if not content_disposition: raise DepositError( BAD_REQUEST, "CONTENT_DISPOSITION header is mandatory", "For archive deposit, the CONTENT_DISPOSITION header must be sent.", ) packaging = headers.packaging if packaging and packaging not in ACCEPT_PACKAGINGS: raise DepositError( BAD_REQUEST, f"Only packaging {ACCEPT_PACKAGINGS} is supported", f"The packaging provided {packaging} is not supported", ) filehandler = request.FILES["file"] assert isinstance(filehandler, UploadedFile), filehandler self._check_file_length(filehandler, content_length) self._check_file_md5sum(filehandler, headers.content_md5sum) # actual storage of data archive_metadata = filehandler self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives, ) return Receipt( deposit_id=deposit.id, deposit_date=deposit.reception_date, status=deposit.status, archive=filehandler.name, ) def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]: """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, ) -> Receipt: """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be updated replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Raises: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ content_types_present = set() data: Dict[str, Optional[Any]] = { "application/zip": None, # expected either zip "application/x-tar": None, # or x-tar "application/atom+xml": None, } for key, value in request.FILES.items(): fh = value content_type = fh.content_type if content_type in content_types_present: raise DepositError( ERROR_CONTENT, "Only 1 application/zip (or application/x-tar) archive " "and 1 atom+xml entry is supported (as per sword2.0 " "specification)", "You provided more than 1 application/(zip|x-tar) " "or more than 1 application/atom+xml content-disposition " "header in the multipart deposit", ) content_types_present.add(content_type) assert content_type is not None data[content_type] = fh if len(content_types_present) != 2: raise DepositError( ERROR_CONTENT, "You must provide both 1 application/zip (or " "application/x-tar) and 1 atom+xml entry for multipart " "deposit", "You need to provide only 1 application/(zip|x-tar) " "and 1 application/atom+xml content-disposition header " "in the multipart deposit", ) filehandler = data["application/zip"] if not filehandler: filehandler = data["application/x-tar"] assert isinstance(filehandler, UploadedFile), filehandler self._check_file_length(filehandler) self._check_file_md5sum(filehandler, headers.content_md5sum) try: raw_metadata, metadata = self._read_metadata(data["application/atom+xml"]) except ParserError: raise DepositError( PARSING_ERROR, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) self._set_deposit_origin_from_metadata(deposit, metadata, headers) # actual storage of data self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives ) assert filehandler is not None return Receipt( deposit_id=deposit.id, deposit_date=deposit.reception_date, archive=filehandler.name, status=deposit.status, ) def _store_metadata_deposit( self, deposit: Deposit, swhid_reference: Union[str, QualifiedSWHID], metadata: Dict, raw_metadata: bytes, deposit_origin: Optional[str] = None, ) -> Tuple[ExtendedSWHID, Deposit, DepositRequest]: """When all user inputs pass the checks, this associates the raw_metadata to the swhid_reference in the raw extrinsic metadata storage. In case of any issues, a bad request response is returned to the user with the details. Checks: - metadata are technically parsable - metadata pass the functional checks - SWHID (if any) is technically valid Args: deposit: Deposit reference swhid_reference: The swhid or the origin to attach metadata information to metadata: Full dict of metadata to check for validity (parsed out of raw_metadata) raw_metadata: The actual raw metadata to send in the storage metadata deposit_origin: Optional deposit origin url to use if any (e.g. deposit update scenario provides one) Raises: DepositError in case of incorrect inputs from the deposit client (e.g. functionally invalid metadata, ...) Returns: Tuple of target swhid, deposit, and deposit request """ metadata_ok, error_details = check_metadata(metadata) if not metadata_ok: assert error_details, "Details should be set when a failure occurs" raise DepositError( BAD_REQUEST, "Functional metadata checks failure", convert_status_detail(error_details), ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit.client.provider_url, metadata={"name": deposit.client.last_name}, ) metadata_fetcher = self.swh_deposit_fetcher() # replace metadata within the deposit backend deposit_request_data = { METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } # actually add the metadata to the completed deposit deposit_request = self._deposit_request_put(deposit, deposit_request_data) target_swhid: ExtendedSWHID # origin URL or CoreSWHID if isinstance(swhid_reference, str): target_swhid = Origin(swhid_reference).swhid() metadata_context = {} else: metadata_context = compute_metadata_context(swhid_reference) if deposit_origin: # metadata deposit update on completed deposit metadata_context["origin"] = deposit_origin target_swhid = extended_swhid_from_qualified(swhid_reference) self._check_swhid_in_archive(target_swhid) # metadata deposited by the client metadata_object = RawExtrinsicMetadata( target=target_swhid, # core swhid or origin discovery_date=deposit_request.date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=raw_metadata, **metadata_context, ) # metadata on the metadata object swh_deposit_authority = self.swh_deposit_authority() swh_deposit_fetcher = self.swh_deposit_fetcher() metametadata_object = RawExtrinsicMetadata( target=metadata_object.swhid(), discovery_date=deposit_request.date, authority=swh_deposit_authority, fetcher=swh_deposit_fetcher, format="xml-deposit-info", metadata=render_to_string( "deposit/deposit_info.xml", context={"deposit": deposit} ).encode(), ) # write to metadata storage self.storage_metadata.metadata_authority_add( [metadata_authority, swh_deposit_authority] ) self.storage_metadata.metadata_fetcher_add( [metadata_fetcher, swh_deposit_fetcher] ) self.storage_metadata.raw_extrinsic_metadata_add( [metadata_object, metametadata_object] ) return (target_swhid, deposit, deposit_request) def _check_swhid_in_archive(self, target_swhid: ExtendedSWHID) -> None: """Check the target object already exists in the archive, and raises a BAD_REQUEST if it does not.""" if target_swhid.object_type in (ExtendedObjectType.CONTENT,): if list( self.storage.content_missing_per_sha1_git([target_swhid.object_id]) ): raise DepositError( BAD_REQUEST, f"Cannot load metadata on {target_swhid}, this content " f"object does not exist in the archive (yet?).", ) elif target_swhid.object_type in ( ExtendedObjectType.DIRECTORY, ExtendedObjectType.REVISION, ExtendedObjectType.RELEASE, ExtendedObjectType.SNAPSHOT, ): target_type_name = target_swhid.object_type.name.lower() method = getattr(self.storage, target_type_name + "_missing") if list(method([target_swhid.object_id])): raise DepositError( BAD_REQUEST, f"Cannot load metadata on {target_swhid}, this {target_type_name} " f"object does not exist in the archive (yet?).", ) elif target_swhid.object_type in (ExtendedObjectType.ORIGIN,): if None in list(self.storage.origin_get_by_sha1([target_swhid.object_id])): raise DepositError( BAD_REQUEST, "Cannot load metadata on origin, it is not (yet?) known to the " "archive.", ) else: # This should not happen, because target_swhid is generated from either # a core swhid or an origin URL. # Let's just check it again so the "switch" is exhaustive. raise ValueError( f"_check_swhid_in_archive expected core SWHID or origin SWHID, " f"but got {target_swhid}." ) def _atom_entry( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, ) -> Receipt: """Atom entry deposit. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be updated replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Raises: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(request.data) except ParserError: raise DepositError( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if metadata is None: raise DepositError( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) self._set_deposit_origin_from_metadata(deposit, metadata, headers) # Determine if we are in the metadata-only deposit case try: swhid_ref = parse_swh_reference(metadata) except ValidationError as e: raise DepositError( PARSING_ERROR, "Invalid SWHID reference", str(e), ) if swhid_ref is not None and ( deposit.origin_url or deposit.parent or deposit.external_id ): raise DepositError( BAD_REQUEST, " is for metadata-only deposits and " " / / Slug are for " "code deposits, only one may be used on a given deposit.", ) if swhid_ref is not None: deposit.save() # We need a deposit id target_swhid, depo, depo_request = self._store_metadata_deposit( deposit, swhid_ref, metadata, raw_metadata ) deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS if isinstance(swhid_ref, QualifiedSWHID): deposit.swhid = str(extended_swhid_from_qualified(swhid_ref)) deposit.swhid_context = str(swhid_ref) deposit.complete_date = depo_request.date deposit.reception_date = depo_request.date deposit.save() return Receipt( deposit_id=deposit.id, deposit_date=depo_request.date, status=deposit.status, archive=None, ) self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives, ) return Receipt( deposit_id=deposit.id, deposit_date=deposit.reception_date, status=deposit.status, archive=None, ) def _set_deposit_origin_from_metadata(self, deposit, metadata, headers): create_origin = metadata.get("swh:deposit", {}).get("swh:create_origin") add_to_origin = metadata.get("swh:deposit", {}).get("swh:add_to_origin") if create_origin and add_to_origin: raise DepositError( BAD_REQUEST, " and are mutually exclusive, " "as they respectively create a new origin and add to an existing " "origin.", ) if create_origin: origin_url = create_origin["swh:origin"]["@url"] check_client_origin(deposit.client, origin_url) deposit.origin_url = origin_url if add_to_origin: origin_url = add_to_origin["swh:origin"]["@url"] check_client_origin(deposit.client, origin_url) deposit.parent = ( Deposit.objects.filter( client=deposit.client, origin_url=origin_url, status=DEPOSIT_STATUS_LOAD_SUCCESS, ) .order_by("-id")[0:1] .get() ) deposit.origin_url = origin_url if "atom:external_identifier" in metadata: # Deprecated tag. # When clients stopped using it, this should raise an error # unconditionally if deposit.origin_url: raise DepositError( BAD_REQUEST, " is deprecated, you should only use " " and from now on.", ) if headers.slug and metadata["atom:external_identifier"] != headers.slug: raise DepositError( BAD_REQUEST, "The tag and Slug header are deprecated, " " or " "should be used instead.", ) def _empty_post( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> Receipt: """Empty post to finalize a deposit. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be finalized """ self._complete_deposit(deposit) assert deposit.complete_date is not None return Receipt( deposit_id=deposit.id, deposit_date=deposit.complete_date, status=deposit.status, archive=None, ) def additional_checks( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit], ) -> Dict[str, Any]: """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def get_client(self, request) -> DepositClient: # This class depends on AuthenticatedAPIView, so request.user.username # is always set username = request.user.username assert username is not None if self._client is None: try: self._client = DepositClient.objects.get( # type: ignore username=username ) except DepositClient.DoesNotExist: raise DepositError(NOT_FOUND, f"Unknown client name {username}") assert self._client.username == username return self._client def checks( self, request: Request, collection_name: str, deposit: Optional[Deposit] = None ) -> ParsedRequestHeaders: if deposit is None: collection = get_collection_by_name(collection_name) else: assert collection_name == deposit.collection.name collection = deposit.collection client = self.get_client(request) collection_id = collection.id collections = client.collections assert collections is not None if collection_id not in collections: raise DepositError( FORBIDDEN, f"Client {client.username} cannot access collection {collection_name}", ) headers = self._read_headers(request) if deposit is not None: self.restrict_access(request, headers, deposit) if headers.on_behalf_of: raise DepositError(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") self.additional_checks(request, headers, collection_name, deposit) return headers def restrict_access( self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit ) -> None: """Allow modifications on deposit with status 'partial' only, reject the rest. """ if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = f"This deposit has status '{deposit.status}'" raise DepositError( BAD_REQUEST, summary=summary, verbose_description=description ) def _basic_not_allowed_method(self, request: Request, method: str): raise DepositError( METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint", ) def get( self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: return self._basic_not_allowed_method(request, "GET") def post( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: return self._basic_not_allowed_method(request, "POST") def put( self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: return self._basic_not_allowed_method(request, "PUT") def delete( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: return self._basic_not_allowed_method(request, "DELETE") class APIGet(APIBase, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get( self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ deposit = get_deposit_by_id(deposit_id, collection_name) self.checks(request, collection_name, deposit) r = self.process_get(request, collection_name, deposit) status, content, content_type = r if content_type == "swh/generator": with content as path: return FileResponse( open(path, "rb"), status=status, content_type="application/tar" ) if content_type == "application/json": return HttpResponse( json.dumps(content), status=status, content_type=content_type ) return HttpResponse(content, status=status, content_type=content_type) @abstractmethod def process_get( self, request: Request, collection_name: str, deposit: Deposit ) -> Tuple[int, Any, str]: """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class APIPost(APIBase, metaclass=ABCMeta): """Mixin for class to support POST method. """ def post( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ if deposit_id is None: deposit = None else: deposit = get_deposit_by_id(deposit_id, collection_name) headers = self.checks(request, collection_name, deposit) status, iri_key, receipt = self.process_post( request, headers, collection_name, deposit ) return self._make_deposit_receipt( request, collection_name, status, iri_key, receipt, ) def _make_deposit_receipt( self, request, collection_name: str, status: int, iri_key: str, receipt: Receipt, ) -> HttpResponse: """Returns an HttpResponse with a SWORD Deposit receipt as content.""" # Build the IRIs in the receipt args = [collection_name, receipt.deposit_id] iris = { iri: request.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI] } context = { **attr.asdict(receipt), **iris, "packagings": ACCEPT_PACKAGINGS, } response = render( request, "deposit/deposit_receipt.xml", context=context, content_type="application/xml", status=status, ) response._headers["location"] = "Location", iris[iri_key] # type: ignore return response @abstractmethod def process_post( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit] = None, ) -> Tuple[int, str, Receipt]: """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_IRI, etc...) - Receipt """ pass class APIPut(APIBase, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put( self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ if deposit_id is None: deposit = None else: deposit = get_deposit_by_id(deposit_id, collection_name) headers = self.checks(request, collection_name, deposit) self.process_put(request, headers, collection_name, deposit) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> None: """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class APIDelete(APIBase, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ assert deposit_id is not None deposit = get_deposit_by_id(deposit_id, collection_name) self.checks(request, collection_name, deposit) self.process_delete(request, collection_name, deposit) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete( self, request: Request, collection_name: str, deposit: Deposit ) -> None: """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.APIUpdateArchive) """ pass diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 4efdc544..307fada2 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,89 +1,91 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict, List, Tuple from rest_framework.permissions import AllowAny +from rest_framework.views import APIView from swh.deposit import utils -from swh.deposit.api.common import AuthenticatedAPIView from ...config import METADATA_TYPE, APIConfig from ...models import Deposit, DepositRequest class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit: Deposit, request_type: str): """Given a deposit, yields its associated deposit_request Args: deposit: Deposit to list requests for request_type: 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit ).order_by("id") for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]: """Given a deposit, retrieve all metadata requests into one Dict and returns both that aggregated metadata dict and the list of raw_metdadata. Args: deposit: The deposit instance to extract metadata from Returns: Tuple of aggregated metadata dict, list of raw_metadata """ metadata: List[Dict[str, Any]] = [] raw_metadata: List[str] = [] for deposit_request in self._deposit_requests( deposit, request_type=METADATA_TYPE ): metadata.append(deposit_request.metadata) raw_metadata.append(deposit_request.raw_metadata) aggregated_metadata = utils.merge(*metadata) return (aggregated_metadata, raw_metadata) -class APIPrivateView(APIConfig, AuthenticatedAPIView): +class APIPrivateView(APIConfig, APIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ - authentication_classes = () - permission_classes = (AllowAny,) + def __init__(self): + super().__init__() + self.authentication_classes = () + self.permission_classes = (AllowAny,) def checks(self, req, collection_name, deposit=None): """Override default checks implementation to allow empty collection. """ headers = self._read_headers(req) self.additional_checks(req, headers, collection_name, deposit) return {"headers": headers} def get( self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): return super().get(request, collection_name, deposit_id) def put( self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): return super().put(request, collection_name, deposit_id) diff --git a/swh/deposit/api/service_document.py b/swh/deposit/api/service_document.py index 369ba942..47f4a15c 100644 --- a/swh/deposit/api/service_document.py +++ b/swh/deposit/api/service_document.py @@ -1,38 +1,42 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from django.urls import reverse from swh.deposit.api.common import ( ACCEPT_ARCHIVE_CONTENT_TYPES, ACCEPT_PACKAGINGS, APIBase, ) from swh.deposit.config import COL_IRI -from swh.deposit.models import DepositCollection +from swh.deposit.models import DepositClient, DepositCollection class ServiceDocumentAPI(APIBase): def get(self, request, *args, **kwargs): - client = request.user + if isinstance(request.user, DepositClient): + client = request.user + else: + client = DepositClient.objects.get(username=request.user) + collections = {} for col_id in client.collections: col = DepositCollection.objects.get(pk=col_id) col_uri = request.build_absolute_uri(reverse(COL_IRI, args=[col.name])) collections[col.name] = col_uri context = { "max_upload_size": self.config["max_upload_size"], "accept_packagings": ACCEPT_PACKAGINGS, "accept_content_types": ACCEPT_ARCHIVE_CONTENT_TYPES, "collections": collections, } return render( request, "deposit/service_document.xml", context, content_type="application/xml", ) diff --git a/swh/deposit/cli/admin.py b/swh/deposit/cli/admin.py index 792ce68c..aa0a73db 100644 --- a/swh/deposit/cli/admin.py +++ b/swh/deposit/cli/admin.py @@ -1,283 +1,285 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control from __future__ import annotations from typing import TYPE_CHECKING import click from swh.deposit.cli import deposit if TYPE_CHECKING: from swh.deposit.models import DepositCollection @deposit.group("admin") @click.option( "--config-file", "-C", default=None, type=click.Path(exists=True, dir_okay=False,), help="Optional extra configuration file.", ) @click.option( "--platform", default="development", type=click.Choice(["development", "production"]), help="development or production platform", ) @click.pass_context def admin(ctx, config_file: str, platform: str): """Server administration tasks (manipulate user or collections)""" from swh.deposit.config import setup_django_for # configuration happens here setup_django_for(platform, config_file=config_file) @admin.group("user") @click.pass_context def user(ctx): """Manipulate user.""" # configuration happens here pass def _create_collection(name: str) -> DepositCollection: """Create the collection with name if it does not exist. Args: name: collection name Returns: collection: the existing collection object """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection try: collection = DepositCollection.objects.get(name=name) click.echo(f"Collection '{name}' exists, skipping.") except DepositCollection.DoesNotExist: click.echo(f"Create collection '{name}'.") collection = DepositCollection.objects.create(name=name) click.echo(f"Collection '{name}' created.") return collection @user.command("create") @click.option("--username", required=True, help="User's name") @click.option("--password", help="(Deprecated) Desired user password (plain).") @click.option("--firstname", default="", help="User's first name") @click.option("--lastname", default="", help="User's last name") @click.option("--email", default="", help="User's email") @click.option("--collection", help="User's collection") @click.option("--provider-url", default="", help="Provider URL") @click.option("--domain", default="", help="The domain") @click.pass_context def user_create( ctx, username: str, password: str, firstname: str, lastname: str, email: str, collection: str, provider_url: str, domain: str, ): """Create a user with some needed information (password, collection) If the collection does not exist, the collection is then created alongside. The password is stored encrypted using django's utilities. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient # If collection is not provided, fallback to username if not collection: collection = username # create the collection if it does not exist collection_ = _create_collection(collection) # user create/update try: user = DepositClient.objects.get(username=username) # type: ignore click.echo(f"Update user '{username}'.") action_done = "updated" except DepositClient.DoesNotExist: click.echo(f"Create user '{username}'.") user = DepositClient(username=username) user.save() action_done = "created" + if password: + user.set_password(password) user.collections = [collection_.id] user.first_name = firstname user.last_name = lastname user.email = email user.is_active = True user.provider_url = provider_url user.domain = domain user.save() click.echo(f"User '{username}' {action_done}.") @user.command("list") @click.pass_context def user_list(ctx): """List existing users. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient users = DepositClient.objects.all() if not users: output = "Empty user list" else: output = "\n".join((user.username for user in users)) click.echo(output) @user.command("exists") @click.argument("username", required=True) @click.pass_context def user_exists(ctx, username: str): """Check if user exists. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient try: DepositClient.objects.get(username=username) # type: ignore click.echo(f"User {username} exists.") ctx.exit(0) except DepositClient.DoesNotExist: click.echo(f"User {username} does not exist.") ctx.exit(1) @admin.group("collection") @click.pass_context def collection(ctx): """Manipulate collections.""" pass @collection.command("create") @click.option("--name", required=True, help="Collection's name") @click.pass_context def collection_create(ctx, name): _create_collection(name) @collection.command("list") @click.pass_context def collection_list(ctx): """List existing collections. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection collections = DepositCollection.objects.all() if not collections: output = "Empty collection list" else: output = "\n".join((col.name for col in collections)) click.echo(output) @admin.group("deposit") @click.pass_context def adm_deposit(ctx): """Manipulate deposit.""" pass @adm_deposit.command("reschedule") @click.option("--deposit-id", required=True, help="Deposit identifier") @click.pass_context def adm_deposit_reschedule(ctx, deposit_id): """Reschedule the deposit loading This will: - check the deposit's status to something reasonable (failed or done). That means that the checks have passed alright but something went wrong during the loading (failed: loading failed, done: loading ok, still for some reasons as in bugs, we need to reschedule it) - reset the deposit's status to 'verified' (prior to any loading but after the checks which are fine) and removes the different archives' identifiers (swh-id, ...) - trigger back the loading task through the scheduler """ # to avoid loading too early django namespaces from datetime import datetime from swh.deposit.config import ( DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_VERIFIED, APIConfig, ) from swh.deposit.models import Deposit try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: click.echo(f"Deposit {deposit_id} does not exist.") ctx.exit(1) # Check the deposit is in a reasonable state accepted_statuses = [DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE] if deposit.status == DEPOSIT_STATUS_VERIFIED: click.echo(f"Deposit {deposit_id} already set for rescheduling.") ctx.exit(0) if deposit.status not in accepted_statuses: click.echo( f"Deposit {deposit_id} cannot be rescheduled (status: {deposit.status}).\n" "Rescheduling deposit is only accepted for deposit with status: " f"{', '.join(accepted_statuses)}." ) ctx.exit(1) task_id = deposit.load_task_id if not task_id: click.echo( f"Deposit {deposit_id} cannot be rescheduled. It misses the " "associated scheduler task id (field load_task_id)." ) ctx.exit(1) # Reset the deposit's state deposit.swhid = None deposit.swhid_context = None deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() # Schedule back the deposit loading task scheduler = APIConfig().scheduler scheduler.set_status_tasks( [task_id], status="next_run_not_scheduled", next_run=datetime.now() ) diff --git a/swh/deposit/settings/common.py b/swh/deposit/settings/common.py index 5360782c..38e26d79 100644 --- a/swh/deposit/settings/common.py +++ b/swh/deposit/settings/common.py @@ -1,115 +1,114 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django settings for swh project. Generated by 'django-admin startproject' using Django 1.10.7. For more information on this file, see https://docs.djangoproject.com/en/1.10/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/1.10/ref/settings/ """ import os # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ ALLOWED_HOSTS = ["127.0.0.1", "localhost"] # Application definition INSTALLED_APPS = [ "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.staticfiles", "django.contrib.sessions", "django.contrib.messages", "django.contrib.postgres", # for JSONField, ArrayField "swh.deposit.apps.DepositConfig", ] MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.deposit.auth.WrapBasicAuthenticationResponseMiddleware", "swh.deposit.errors.DepositErrorMiddleware", ] ROOT_URLCONF = "swh.deposit.urls" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": [], "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", ], }, }, ] # Password validation # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",}, {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",}, {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",}, ] # Internationalization # https://docs.djangoproject.com/en/1.10/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.10/howto/static-files/ STATIC_URL = "/static/" REST_FRAMEWORK = { - "DEFAULT_AUTHENTICATION_CLASSES": ("swh.deposit.auth.KeycloakBasicAuthentication",), "EXCEPTION_HANDLER": "swh.deposit.exception.custom_exception_handler", } FILE_UPLOAD_HANDLERS = [ "django.core.files.uploadhandler.MemoryFileUploadHandler", "django.core.files.uploadhandler.TemporaryFileUploadHandler", ] CACHES = {"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache",}} diff --git a/swh/deposit/settings/production.py b/swh/deposit/settings/production.py index 61385d62..bc8d42ec 100644 --- a/swh/deposit/settings/production.py +++ b/swh/deposit/settings/production.py @@ -1,93 +1,110 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from swh.core import config from .common import * # noqa -from .common import ALLOWED_HOSTS +from .common import ALLOWED_HOSTS, CACHES ALLOWED_HOSTS += ["deposit.softwareheritage.org"] # Setup support for proxy headers USE_X_FORWARDED_HOST = True SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") DEBUG = False # Database # https://docs.djangoproject.com/en/1.10/ref/settings/#databases # https://docs.djangoproject.com/en/1.10/ref/settings/#std:setting-DATABASES # https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/#databases # Retrieve the deposit's configuration file # and check the required setup is ok # If not raise an error explaining the errors config_file = os.environ.get("SWH_CONFIG_FILENAME") if not config_file: raise ValueError( "Production: SWH_CONFIG_FILENAME must be set to the" " configuration file needed!" ) if not os.path.exists(config_file): raise ValueError( "Production: configuration file %s does not exist!" % (config_file,) ) conf = config.load_named_config(config_file) if not conf: raise ValueError("Production: configuration %s does not exist." % (config_file,)) -for key in ("scheduler", "private"): +for key in ("scheduler", "private", "authentication_provider"): if not conf.get(key): raise ValueError( "Production: invalid configuration; missing %s config entry." % (key,) ) ALLOWED_HOSTS += conf.get("allowed_hosts", []) private_conf = conf["private"] SECRET_KEY = private_conf["secret_key"] # Deactivate logging configuration as our uwsgi application is configured to do it # https://docs.djangoproject.com/en/2.2/ref/settings/#logging-config LOGGING_CONFIG = None # database db_conf = private_conf.get("db", {"name": "unset"}) db = { "ENGINE": "django.db.backends.postgresql", "NAME": db_conf["name"], } db_user = db_conf.get("user") if db_user: db["USER"] = db_user db_pass = db_conf.get("password") if db_pass: db["PASSWORD"] = db_pass db_host = db_conf.get("host") if db_host: db["HOST"] = db_host db_port = db_conf.get("port") if db_port: db["PORT"] = db_port # https://docs.djangoproject.com/en/1.10/ref/settings/#databases DATABASES = { "default": db, } # Upload user directory # https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-MEDIA_ROOT MEDIA_ROOT = private_conf.get("media_root") + +# Default authentication is http basic +authentication = conf["authentication_provider"] + +# With the following, we delegate the authentication mechanism to keycloak +if authentication == "keycloak": + # Optional cache server + server_cache = conf.get("cache_uri") + if server_cache: + CACHES.update( + { + "default": { + "BACKEND": "django.core.cache.backends.memcached.MemcachedCache", + "LOCATION": server_cache, + } + } + ) diff --git a/swh/deposit/settings/testing.py b/swh/deposit/settings/testing.py index ca0fe59e..ea67376c 100644 --- a/swh/deposit/settings/testing.py +++ b/swh/deposit/settings/testing.py @@ -1,42 +1,46 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from .common import * # noqa from .common import ALLOWED_HOSTS from .development import * # noqa from .development import INSTALLED_APPS # django setup ALLOWED_HOSTS += ["testserver"] INSTALLED_APPS += ["pytest_django"] # https://docs.djangoproject.com/en/1.10/ref/settings/#logging LOGGING = { "version": 1, "disable_existing_loggers": True, "formatters": { "standard": { "format": "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa "datefmt": "%d/%b/%Y %H:%M:%S", }, }, "handlers": { "console": { "level": "ERROR", "class": "logging.StreamHandler", "formatter": "standard", }, }, "loggers": {"swh.deposit": {"handlers": ["console"], "level": "ERROR",},}, } # https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-MEDIA_ROOT # SECURITY WARNING: Override this in the production.py module MEDIA_ROOT = "/tmp/swh-deposit/test/uploads/" FILE_UPLOAD_HANDLERS = [ "django.core.files.uploadhandler.MemoryFileUploadHandler", ] + +REST_FRAMEWORK = { + "EXCEPTION_HANDLER": "swh.deposit.exception.custom_exception_handler", +} diff --git a/swh/deposit/tests/api/test_basic_auth.py b/swh/deposit/tests/api/test_basic_auth.py new file mode 100644 index 00000000..fe214994 --- /dev/null +++ b/swh/deposit/tests/api/test_basic_auth.py @@ -0,0 +1,32 @@ +# Copyright (C) 2017-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +"""Module to check at least one basic authentication works. + +""" + +from django.urls import reverse_lazy as reverse +import pytest + +from swh.deposit.config import SD_IRI + +from .test_service_document import check_response + + +@pytest.fixture() +def deposit_config(common_deposit_config): + return { + **common_deposit_config, + "authentication_provider": "basic", + } + + +def test_service_document_basic(basic_authenticated_client): + """With authentication, service document list user's collection + + """ + url = reverse(SD_IRI) + response = basic_authenticated_client.get(url) + check_response(response, basic_authenticated_client.deposit_client.username) diff --git a/swh/deposit/tests/api/test_service_document.py b/swh/deposit/tests/api/test_service_document.py index 666a151f..0ee77b75 100644 --- a/swh/deposit/tests/api/test_service_document.py +++ b/swh/deposit/tests/api/test_service_document.py @@ -1,82 +1,82 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse_lazy as reverse from rest_framework import status from swh.deposit.config import SD_IRI def test_service_document_no_auth_fails(client): """Without authentication, service document endpoint should return 401 """ url = reverse(SD_IRI) response = client.get(url) assert response.status_code == status.HTTP_401_UNAUTHORIZED def test_service_document_no_auth_with_http_auth_should_not_break(client): """Without auth, sd endpoint through browser should return 401 """ url = reverse(SD_IRI) response = client.get(url, HTTP_ACCEPT="text/html,application/xml;q=9,*/*,q=8") assert response.status_code == status.HTTP_401_UNAUTHORIZED def test_service_document(authenticated_client): """With authentication, service document list user's collection """ url = reverse(SD_IRI) response = authenticated_client.get(url) check_response(response, authenticated_client.deposit_client.username) def test_service_document_with_http_accept_header(authenticated_client): """With authentication, with browser, sd list user's collection """ url = reverse(SD_IRI) response = authenticated_client.get( url, HTTP_ACCEPT="text/html,application/xml;q=9,*/*,q=8" ) check_response(response, authenticated_client.deposit_client.username) def check_response(response, username): - assert response.status_code == status.HTTP_200_OK + assert response.status_code == status.HTTP_200_OK, f"Response: {response.content}" assert ( response.content.decode("utf-8") == """ 2.0 %s The Software Heritage (SWH) Archive %s Software Collection application/zip application/x-tar Collection Policy Software Heritage Archive Collect, Preserve, Share false false http://purl.org/net/sword/package/SimpleZip http://testserver/1/%s/ %s """ % (500, username, username, username, username) ) # noqa diff --git a/swh/deposit/tests/cli/test_admin.py b/swh/deposit/tests/cli/test_admin.py index 32afd670..69860ba9 100644 --- a/swh/deposit/tests/cli/test_admin.py +++ b/swh/deposit/tests/cli/test_admin.py @@ -1,317 +1,319 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.deposit.cli.admin import admin as cli from swh.deposit.config import ( DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_VERIFIED, ) from swh.deposit.models import DepositClient, DepositCollection from swh.scheduler.utils import create_oneshot_task_dict @pytest.fixture(autouse=True) def enable_db_access_for_all_tests(db): pass def test_cli_admin_user_list_nothing(cli_runner): result = cli_runner.invoke(cli, ["user", "list",]) assert result.exit_code == 0, f"Unexpected output: {result.output}" assert result.output == "Empty user list\n" def test_cli_admin_user_list_with_users(cli_runner, deposit_user): result = cli_runner.invoke(cli, ["user", "list",]) assert result.exit_code == 0, f"Unexpected output: {result.output}" assert result.output == f"{deposit_user.username}\n" # only 1 user def test_cli_admin_collection_list_nothing(cli_runner): result = cli_runner.invoke(cli, ["collection", "list",]) assert result.exit_code == 0, f"Unexpected output: {result.output}" assert result.output == "Empty collection list\n" def test_cli_admin_collection_list_with_collections(cli_runner, deposit_collection): from swh.deposit.tests.conftest import create_deposit_collection new_collection = create_deposit_collection("something") result = cli_runner.invoke(cli, ["collection", "list",]) assert result.exit_code == 0, f"Unexpected output: {result.output}" collections = "\n".join([deposit_collection.name, new_collection.name]) assert result.output == f"{collections}\n" def test_cli_admin_user_exists_unknown(cli_runner): result = cli_runner.invoke(cli, ["user", "exists", "unknown"]) assert result.exit_code == 1, f"Unexpected output: {result.output}" assert result.output == "User unknown does not exist.\n" def test_cli_admin_user_exists(cli_runner, deposit_user): result = cli_runner.invoke(cli, ["user", "exists", deposit_user.username]) assert result.exit_code == 0, f"Unexpected output: {result.output}" assert result.output == f"User {deposit_user.username} exists.\n" def test_cli_admin_create_collection(cli_runner): collection_name = "something" try: DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: pass result = cli_runner.invoke( cli, ["collection", "create", "--name", collection_name,] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" collection = DepositCollection.objects.get(name=collection_name) assert collection is not None assert ( result.output == f"""Create collection '{collection_name}'. Collection '{collection_name}' created. """ ) result2 = cli_runner.invoke( cli, ["collection", "create", "--name", collection_name,] ) assert result2.exit_code == 0, f"Unexpected output: {result.output}" assert ( result2.output == f"""Collection '{collection_name}' exists, skipping. """ ) def test_cli_admin_user_create(cli_runner): user_name = "user" collection_name = user_name try: DepositClient.objects.get(username=user_name) except DepositClient.DoesNotExist: pass try: DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: pass result = cli_runner.invoke( cli, ["user", "create", "--username", user_name, "--password", "password",] ) assert result.exit_code == 0, f"Unexpected output: {result.output}" user = DepositClient.objects.get(username=user_name) assert user is not None collection = DepositCollection.objects.get(name=collection_name) assert collection is not None assert ( result.output == f"""Create collection '{user_name}'. Collection '{collection_name}' created. Create user '{user_name}'. User '{user_name}' created. """ ) assert collection.name == collection_name assert user.username == user_name first_password = user.password assert first_password is not None assert user.collections == [collection.id] assert user.is_active is True assert user.domain == "" assert user.provider_url == "" assert user.email == "" assert user.first_name == "" assert user.last_name == "" # create a user that already exists result2 = cli_runner.invoke( cli, [ "user", "create", "--username", "user", "--password", "another-password", # changing password "--collection", collection_name, # specifying the collection this time "--firstname", "User", "--lastname", "no one", "--email", "user@org.org", "--provider-url", "http://some-provider.org", "--domain", "domain", ], ) assert result2.exit_code == 0, f"Unexpected output: {result2.output}" user = DepositClient.objects.get(username=user_name) assert user is not None assert user.username == user_name assert user.collections == [collection.id] assert user.is_active is True second_password = user.password assert second_password is not None - assert second_password == first_password, "Password not changed (no longer used)" + # For the transition period, we can choose either basic or keycloak so we need to be + # able to still define a password (basic), so there it's updated. + assert second_password != first_password, "Password changed" assert user.domain == "domain" assert user.provider_url == "http://some-provider.org" assert user.email == "user@org.org" assert user.first_name == "User" assert user.last_name == "no one" assert ( result2.output == f"""Collection '{collection_name}' exists, skipping. Update user '{user_name}'. User '{user_name}' updated. """ ) def test_cli_admin_reschedule_unknown_deposit(cli_runner): """Rescheduling unknown deposit should report failure """ unknown_deposit_id = 666 from swh.deposit.models import Deposit try: Deposit.objects.get(id=unknown_deposit_id) except Deposit.DoesNotExist: pass result = cli_runner.invoke( cli, ["deposit", "reschedule", "--deposit-id", unknown_deposit_id] ) assert result.output == f"Deposit {unknown_deposit_id} does not exist.\n" assert result.exit_code == 1 def test_cli_admin_reschedule_verified_deposit(cli_runner, complete_deposit): """Rescheduling verified deposit should do nothing but report """ deposit = complete_deposit deposit.status = "verified" deposit.save() result = cli_runner.invoke( cli, ["deposit", "reschedule", "--deposit-id", deposit.id] ) assert result.output == f"Deposit {deposit.id} already set for rescheduling.\n" assert result.exit_code == 0 @pytest.mark.parametrize( "status_to_check", [DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_DEPOSITED] ) def test_cli_admin_reschedule_unaccepted_deposit_status( status_to_check, cli_runner, complete_deposit ): """Rescheduling verified deposit should do nothing but report """ deposit = complete_deposit deposit.status = status_to_check # not accepted status will fail the check deposit.save() result = cli_runner.invoke( cli, ["deposit", "reschedule", "--deposit-id", deposit.id] ) assert result.output == ( f"Deposit {deposit.id} cannot be rescheduled (status: {deposit.status}).\n" "Rescheduling deposit is only accepted for deposit with status: done, failed.\n" ) assert result.exit_code == 1 def test_cli_admin_reschedule_missing_task_id(cli_runner, complete_deposit): """Rescheduling deposit with no load_task_id cannot work. """ deposit = complete_deposit deposit.load_task_id = "" # drop the load-task-id so it fails the check deposit.save() result = cli_runner.invoke( cli, ["deposit", "reschedule", "--deposit-id", deposit.id] ) assert result.output == ( f"Deposit {deposit.id} cannot be rescheduled. It misses the " "associated scheduler task id (field load_task_id).\n" ) assert result.exit_code == 1 def test_cli_admin_reschedule_nominal(cli_runner, complete_deposit, swh_scheduler): """Rescheduling deposit with no load_task_id cannot work. """ deposit = complete_deposit from swh.deposit.models import Deposit # create a task to keep a reference on it task = create_oneshot_task_dict( "load-deposit", url=deposit.origin_url, deposit_id=deposit.id, retries_left=3 ) scheduled_task = swh_scheduler.create_tasks([task])[0] # disable it swh_scheduler.set_status_tasks([scheduled_task["id"]], status="disabled") # Now update the deposit state with some swhid and relevant load_task_id deposit = complete_deposit deposit.load_task_id = scheduled_task["id"] deposit.swhid = "swh:1:dir:02ed6084fb0e8384ac58980e07548a547431cf74" deposit.swhid_context = f"{deposit.swhid};origin=https://url/external-id" deposit.save() # Reschedule it result = cli_runner.invoke( cli, ["deposit", "reschedule", "--deposit-id", deposit.id] ) assert result.exit_code == 0 # Now, ensure the deposit and the associated task are in the right shape deposit = Deposit.objects.get(id=deposit.id) # got reset to a state which allows rescheduling assert deposit.id assert deposit.swhid is None assert deposit.swhid_context is None assert deposit.status == DEPOSIT_STATUS_VERIFIED task = swh_scheduler.search_tasks(task_id=deposit.load_task_id)[0] assert task["status"] == "next_run_not_scheduled" diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index b591cf90..58d7f5d5 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,583 +1,608 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from copy import deepcopy from functools import partial from io import BytesIO import os import re from typing import TYPE_CHECKING, Dict, Mapping from django.test.utils import setup_databases # type: ignore from django.urls import reverse_lazy as reverse import psycopg2 from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT import pytest from rest_framework import status from rest_framework.test import APIClient import yaml from swh.auth.pytest_plugin import keycloak_mock_factory from swh.core.config import read from swh.core.pytest_plugin import get_response_cb from swh.deposit.auth import DEPOSIT_PERMISSION from swh.deposit.config import ( COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_VERIFIED, SE_IRI, setup_django_for, ) from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import ( create_arborescence_archive, post_archive, post_atom, ) from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID from swh.scheduler import get_scheduler if TYPE_CHECKING: from swh.deposit.models import Deposit, DepositClient, DepositCollection # mypy is asked to ignore the import statement above because setup_databases # is not part of the d.t.utils.__all__ variable. USERNAME = "test" EMAIL = "test@example.org" COLLECTION = "test" TEST_USER = { "username": USERNAME, - "password": "", + "password": "pass", "email": EMAIL, "provider_url": "https://hal-test.archives-ouvertes.fr/", "domain": "archives-ouvertes.fr/", "collection": {"name": COLLECTION}, } USER_INFO = { "name": USERNAME, "email": EMAIL, "email_verified": False, "family_name": "", "given_name": "", "groups": [], "preferred_username": USERNAME, "sub": "ffffffff-bbbb-4444-aaaa-14f61e6b7200", } USERNAME2 = "test2" EMAIL2 = "test@example.org" COLLECTION2 = "another-collection" TEST_USER2 = { "username": USERNAME2, "password": "", "email": EMAIL2, "provider_url": "https://hal-test.archives-ouvertes.example/", "domain": "archives-ouvertes.example/", "collection": {"name": COLLECTION2}, } KEYCLOAK_SERVER_URL = "https://auth.swh.org/SWHTest" KEYCLOAK_REALM_NAME = "SWHTest" CLIENT_ID = "swh-deposit" keycloak_mock_auth_success = keycloak_mock_factory( server_url=KEYCLOAK_SERVER_URL, realm_name=KEYCLOAK_REALM_NAME, client_id=CLIENT_ID, auth_success=True, user_info=USER_INFO, user_permissions=[DEPOSIT_PERMISSION], ) keycloak_mock_auth_failure = keycloak_mock_factory( server_url=KEYCLOAK_SERVER_URL, realm_name=KEYCLOAK_REALM_NAME, client_id=CLIENT_ID, auth_success=False, ) def pytest_configure(): setup_django_for("testing") @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put/post methods """ cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.put(re.compile("https://"), body=cb) requests_mock_datadir.post(re.compile("https://"), body=cb) return requests_mock_datadir -@pytest.fixture() -def deposit_config(swh_scheduler_config, swh_storage_backend_config): +@pytest.fixture +def common_deposit_config(swh_scheduler_config, swh_storage_backend_config): return { "max_upload_size": 500, "extraction_dir": "/tmp/swh-deposit/test/extraction-dir", "checks": False, "scheduler": {"cls": "local", **swh_scheduler_config,}, "storage": swh_storage_backend_config, "storage_metadata": swh_storage_backend_config, "swh_authority_url": "http://deposit.softwareheritage.example/", + } + + +@pytest.fixture() +def deposit_config(common_deposit_config): + return { + **common_deposit_config, + "authentication_provider": "keycloak", "keycloak": { "server_url": KEYCLOAK_SERVER_URL, "realm_name": KEYCLOAK_REALM_NAME, }, } @pytest.fixture() def deposit_config_path(tmp_path, monkeypatch, deposit_config): conf_path = os.path.join(tmp_path, "deposit.yml") with open(conf_path, "w") as f: f.write(yaml.dump(deposit_config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) return conf_path @pytest.fixture(autouse=True) def deposit_autoconfig(deposit_config_path): """Enforce config for deposit classes inherited from APIConfig.""" cfg = read(deposit_config_path) if "scheduler" in cfg: # scheduler setup: require the check-deposit and load-deposit tasks scheduler = get_scheduler(**cfg["scheduler"]) task_types = [ { "type": "check-deposit", "backend_name": "swh.deposit.loader.tasks.ChecksDepositTsk", "description": "Check deposit metadata/archive before loading", "num_retries": 3, }, { "type": "load-deposit", "backend_name": "swh.loader.package.deposit.tasks.LoadDeposit", "description": "Loading deposit archive into swh archive", "num_retries": 3, }, ] for task_type in task_types: scheduler.create_task_type(task_type) @pytest.fixture(scope="session") def django_db_setup(request, django_db_blocker, postgresql_proc): from django.conf import settings settings.DATABASES["default"].update( { ("ENGINE", "django.db.backends.postgresql"), ("NAME", "tests"), ("USER", postgresql_proc.user), # noqa ("HOST", postgresql_proc.host), # noqa ("PORT", postgresql_proc.port), # noqa } ) with django_db_blocker.unblock(): setup_databases( verbosity=request.config.option.verbose, interactive=False, keepdb=False ) def execute_sql(sql): """Execute sql to postgres db""" with psycopg2.connect(database="postgres") as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) @pytest.fixture(autouse=True, scope="session") def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ os.environ["http_proxy"] = "http://localhost:999" os.environ["https_proxy"] = "http://localhost:999" def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection try: collection = DepositCollection._default_manager.get(name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection def deposit_collection_factory(collection_name): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory(COLLECTION) deposit_another_collection = deposit_collection_factory(COLLECTION2) def _create_deposit_user( collection: "DepositCollection", user_data: Dict ) -> "DepositClient": """Create/Return the test_user "test" + For basic authentication, this will save a password. + This is not required for keycloak authentication scheme. + """ from swh.deposit.models import DepositClient user_data_d = deepcopy(user_data) user_data_d.pop("collection", None) + passwd = user_data_d.pop("password", None) user, _ = DepositClient.objects.get_or_create( # type: ignore username=user_data_d["username"], defaults={**user_data_d, "collections": [collection.id]}, ) + if passwd: + user.set_password(passwd) + user.save() + return user @pytest.fixture def deposit_user(db, deposit_collection): return _create_deposit_user(deposit_collection, TEST_USER) @pytest.fixture def deposit_another_user(db, deposit_another_collection): return _create_deposit_user(deposit_another_collection, TEST_USER2) @pytest.fixture def anonymous_client(): """Create an anonymous client (no credentials during queries to the deposit) """ return APIClient() # <- drf's client def mock_keycloakopenidconnect(mocker, keycloak_mock): """Mock swh.deposit.auth.KeycloakOpenIDConnect to return the keycloak_mock """ mock = mocker.patch("swh.deposit.auth.KeycloakOpenIDConnect") mock.from_configfile.return_value = keycloak_mock return mock @pytest.fixture def mock_keycloakopenidconnect_ok(mocker, keycloak_mock_auth_success): """Mock keycloak so it always accepts connection for user with the right permissions """ return mock_keycloakopenidconnect(mocker, keycloak_mock_auth_success) @pytest.fixture def mock_keycloakopenidconnect_ko(mocker, keycloak_mock_auth_failure): """Mock keycloak so it always refuses connections.""" return mock_keycloakopenidconnect(mocker, keycloak_mock_auth_failure) @pytest.fixture def unauthorized_client(anonymous_client, mock_keycloakopenidconnect_ko): """Create an unauthorized client (will see their authentication fail) """ return anonymous_client -def _create_authenticated_client(client, user): +def _create_authenticated_client(client, user, password=None): """Return a client whose credentials will be proposed to the deposit server. This also patched the client instance to keep a reference on the associated deposit_user. """ - _token = "%s:%s" % (user.username, "irrelevant-in-test-context") + if not password: + password = "irrelevant-if-not-set" + _token = "%s:%s" % (user.username, password) token = base64.b64encode(_token.encode("utf-8")) authorization = "Basic %s" % token.decode("utf-8") client.credentials(HTTP_AUTHORIZATION=authorization) client.deposit_client = user yield client client.logout() +@pytest.fixture +def basic_authenticated_client(anonymous_client, deposit_user): + yield from _create_authenticated_client( + anonymous_client, deposit_user, password=TEST_USER["password"] + ) + + @pytest.fixture def authenticated_client(mock_keycloakopenidconnect_ok, anonymous_client, deposit_user): yield from _create_authenticated_client(anonymous_client, deposit_user) @pytest.fixture def insufficient_perm_client( mocker, keycloak_mock_auth_success, anonymous_client, deposit_user ): """keycloak accepts connection but client returned has no deposit permission, so access is not allowed. """ keycloak_mock_auth_success.user_permissions = [] mock_keycloakopenidconnect(mocker, keycloak_mock_auth_success) yield from _create_authenticated_client(anonymous_client, deposit_user) @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( tmp_path, "archive1", "file1", b"some content in file" ) return archive @pytest.fixture def atom_dataset(datadir) -> Mapping[str, str]: """Compute the paths to atom files. Returns: Dict of atom name per content (bytes) """ atom_path = os.path.join(datadir, "atom") data = {} for filename in os.listdir(atom_path): filepath = os.path.join(atom_path, filename) with open(filepath, "rb") as f: raw_content = f.read().decode("utf-8") # Keep the filename without extension atom_name = filename.split(".")[0] data[atom_name] = raw_content return data def internal_create_deposit( client: "DepositClient", collection: "DepositCollection", external_id: str, status: str, ) -> "Deposit": """Create a deposit for a given collection with internal tool """ from swh.deposit.models import Deposit deposit = Deposit( client=client, external_id=external_id, status=status, collection=collection ) deposit.save() return deposit def create_deposit( client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED, in_progress=False, ): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = post_archive( client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS=str(in_progress).lower(), ) # then assert response.status_code == status.HTTP_201_CREATED, response.content.decode() from swh.deposit.models import Deposit response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit._default_manager.get(id=deposit_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( authenticated_client, collection_name: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, atom_dataset: Mapping[str, bytes] = {}, **kwargs, ): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( authenticated_client, collection_name, deposit_status=DEPOSIT_STATUS_PARTIAL, **kwargs, ) origin_url = deposit.client.provider_url + deposit.external_id response = post_atom( authenticated_client, reverse(SE_IRI, args=[collection_name, deposit.id]), data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit.id) assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED, in_progress=False): """Build deposit with a specific status """ @pytest.fixture() def _deposit( sample_archive, deposit_collection, authenticated_client, deposit_status=deposit_status, ): external_id = "external-id-%s" % deposit_status return create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id=external_id, deposit_status=deposit_status, in_progress=in_progress, ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory( deposit_status=DEPOSIT_STATUS_PARTIAL, in_progress=True ) verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( sample_archive, deposit_collection, authenticated_client, atom_dataset ): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( authenticated_client, deposit_collection.name, sample_archive=sample_archive, external_id="external-id-partial", in_progress=True, deposit_status=DEPOSIT_STATUS_PARTIAL, atom_dataset=atom_dataset, ) @pytest.fixture def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset ): response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data1"], HTTP_SLUG="external-id-partial", HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content["swh:deposit_id"] from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id="external-id-complete", deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS, ) origin = "https://hal.archives-ouvertes.fr/hal-01727745" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" revision_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10") snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0") deposit.swhid = f"swh:1:dir:{directory_id}" deposit.swhid_context = str( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), origin=origin, visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id), anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id), path=b"/", ) ) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version diff --git a/swh/deposit/tests/test_init.py b/swh/deposit/tests/test_init.py index 59a145a1..da24513c 100644 --- a/swh/deposit/tests/test_init.py +++ b/swh/deposit/tests/test_init.py @@ -1,10 +1,26 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import pytest + +from swh.deposit.api.common import APIBase + def test_version(): from swh.deposit import __version__ assert __version__ is not None + + +@pytest.fixture +def deposit_config(common_deposit_config): + assert "authentication_provider" not in common_deposit_config + return common_deposit_config + + +def test_api_base_misconfigured(): + """No authentication_provider key in configuration should raise""" + with pytest.raises(ValueError, match="authentication_provider"): + APIBase()