diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index cb972894..c5cc631c 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,893 +1,884 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from abc import ABCMeta, abstractmethod from django.urls import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.authentication import BasicAuthentication -from rest_framework.permissions import IsAuthenticated, AllowAny +from rest_framework.permissions import IsAuthenticated from rest_framework.views import APIView from swh.model import hashutil from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE ) from ..errors import ( MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, make_error_response_from_dict, FORBIDDEN, NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, ParserError, PARSING_ERROR ) from ..models import ( Deposit, DepositRequest, DepositCollection, DepositClient ) from ..parsers import parse_xml ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar'] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ authentication_classes = (BasicAuthentication, ) permission_classes = (IsAuthenticated, ) -class SWHPrivateAPIView(SWHAPIView): - """Mixin intended as private api (so no authentication) based API view - (for the private ones). - - """ - authentication_classes = () - permission_classes = (AllowAny, ) - - class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type content_length = meta.get('CONTENT_LENGTH') if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get('HTTP_IN_PROGRESS', False) content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') if isinstance(in_progress, str): in_progress = in_progress.lower() == 'true' content_md5sum = meta.get('HTTP_CONTENT_MD5') if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get('HTTP_PACKAGING') slug = meta.get('HTTP_SLUG') on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') return { 'content-type': content_type, 'content-length': content_length, 'in-progress': in_progress, 'content-disposition': content_disposition, 'content-md5sum': content_md5sum, 'packaging': packaging, 'slug': slug, 'on-behalf-of': on_behalf_of, 'metadata-relevant': metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put(self, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) deposit_parent = Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa except Deposit.DoesNotExist: deposit_parent = None deposit = Deposit(collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type deposit.save() return deposit def _deposit_request_put(self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter( deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter( deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) DepositRequest.objects.filter( deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) if deposit.collection.name != collection_name: summary = 'Cannot delete a deposit from another collection' description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config['max_upload_size']: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, 'Upload size limit exceeded (max %s bytes).' % self.config['max_upload_size'], 'Please consider sending the archive in ' 'multiple steps.') length = filehandler.size if length != content_length: return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, 'Wrong length') if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, 'Wrong md5 hash', 'The checksum sent %s and the actual checksum ' '%s does not match.' % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum))) return None def _binary_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers['content-length'] if not content_length: return make_error_dict( BAD_REQUEST, 'CONTENT_LENGTH header is mandatory', 'For archive deposit, the ' 'CONTENT_LENGTH header must be sent.') content_disposition = headers['content-disposition'] if not content_disposition: return make_error_dict( BAD_REQUEST, 'CONTENT_DISPOSITION header is mandatory', 'For archive deposit, the ' 'CONTENT_DISPOSITION header must be sent.') packaging = headers['packaging'] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, 'Only packaging %s is supported' % ACCEPT_PACKAGINGS, 'The packaging provided %s is not supported' % packaging) filehandler = req.FILES['file'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum'], content_length) if precondition_status_response: return precondition_status_response external_id = headers['slug'] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'status': deposit.status, 'archive': filehandler.name, } def _read_metadata(self, metadata_stream): """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers['slug'] content_types_present = set() data = { 'application/zip': None, # expected either zip 'application/x-tar': None, # or x-tar 'application/atom+xml': None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, 'Only 1 application/zip (or application/x-tar) archive ' 'and 1 atom+xml entry is supported (as per sword2.0 ' 'specification)', 'You provided more than 1 application/(zip|x-tar) ' 'or more than 1 application/atom+xml content-disposition ' 'header in the multipart deposit') content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, 'You must provide both 1 application/zip (or ' 'application/x-tar) and 1 atom+xml entry for multipart ' 'deposit', 'You need to provide only 1 application/(zip|x-tar) ' 'and 1 application/atom+xml content-disposition header ' 'in the multipart deposit') filehandler = data['application/zip'] if not filehandler: filehandler = data['application/x-tar'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum']) if precondition_status_response: return precondition_status_response try: raw_metadata, metadata = self._read_metadata( data['application/atom+xml']) except ParserError: return make_error_dict( PARSING_ERROR, 'Malformed xml metadata', "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.") # actual storage of data deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, 'status': deposit.status, } def _atom_entry(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(req.data) except ParserError: return make_error_dict( BAD_REQUEST, 'Malformed xml metadata', "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.") if not metadata: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') external_id = metadata.get('external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': None, 'status': deposit.status, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { 'deposit_id': deposit_id, 'deposit_date': deposit.complete_date, 'status': deposit.status, 'archive': None, } def _make_iris(self, req, collection_name, deposit_id): """Define the IRI endpoints Args: req (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: req.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } def additional_checks(self, req, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get( name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, 'Unknown collection name %s' % collection_name) username = req.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, 'Unknown client name %s' % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, 'Client %s cannot access collection %s' % ( username, collection_name)) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) if headers['on-behalf-of']: return make_error_dict(MEDIATION_NOT_ALLOWED, 'Mediation is not supported.') checks = self.additional_checks(req, headers, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} def restrict_access(self, req, deposit=None): if deposit: if (req.method != 'GET' and deposit.status != DEPOSIT_STATUS_PARTIAL): summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) def _basic_not_allowed_method(self, req, method): return make_error_response( req, METHOD_NOT_ALLOWED, '%s method is not supported on this endpoint' % method) def get(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'GET') def post(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'POST') def put(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'PUT') def delete(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'DELETE') class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) r = self.process_get( req, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] _status, _iri_key, data = self.process_post( req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) data['packagings'] = ACCEPT_PACKAGINGS iris = self._make_iris(req, collection_name, data['deposit_id']) data.update(iris) response = render(req, 'deposit/deposit_receipt.xml', context=data, content_type='application/xml', status=_status) response._headers['location'] = 'Location', data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] data = self.process_put(req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) data = self.process_delete(req, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 986a5351..d257572c 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,51 +1,92 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE from ...models import DepositRequest, Deposit +from rest_framework.permissions import AllowAny + +from swh.deposit.api.common import SWHAPIView +from swh.deposit.errors import make_error_dict, NOT_FOUND + class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ metadata = (m.metadata for m in self._deposit_requests( deposit, request_type=METADATA_TYPE)) return utils.merge(*metadata) + + +class SWHPrivateAPIView(SWHAPIView): + """Mixin intended as private api (so no authentication) based API view + (for the private ones). + + """ + authentication_classes = () + permission_classes = (AllowAny, ) + + def checks(self, req, collection_name, deposit_id=None): + """Override default checks implementation to allow empty collection. + + """ + if deposit_id: + try: + Deposit.objects.get(pk=deposit_id) + except Deposit.DoesNotExist: + return make_error_dict( + NOT_FOUND, + 'Deposit with id %s does not exist' % + deposit_id) + + headers = self._read_headers(req) + checks = self.additional_checks( + req, headers, collection_name, deposit_id) + if 'error' in checks: + return checks + + return {'headers': headers} + + def get(self, req, collection_name=None, deposit_id=None, format=None): + return super().get(req, collection_name, deposit_id, format) + + def put(self, req, collection_name=None, deposit_id=None, format=None): + return super().put(req, collection_name, deposit_id, format) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 4b170aad..8961d914 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,209 +1,209 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re import tarfile import zipfile from rest_framework import status -from . import DepositReadMixin -from ..common import SWHGetDepositAPI, SWHPrivateAPIView +from . import DepositReadMixin, SWHPrivateAPIView +from ..common import SWHGetDepositAPI from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE from ...models import Deposit MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' ARCHIVE_EXTENSIONS = [ 'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', 'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' ] PATTERN_ARCHIVE_EXTENSION = re.compile( r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) -class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): +class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, { 'archive': [{ 'summary': MANDATORY_ARCHIVE_MISSING, }] } errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: errors.append({ 'summary': error_message, 'fields': [archive_request.id] }) if not errors: return True, None return False, { 'archive': errors } def _check_archive(self, archive_request): """Check that a deposit associated archive is ok: - readable - supported archive format - valid content: the archive does not contain a single archive file If any of those checks are not ok, return the corresponding failing check. Args: archive_path (DepositRequest): Archive to check Returns: (True, None) if archive is check compliant, (False, ) otherwise. """ archive_path = archive_request.archive.path try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: files = f.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] if PATTERN_ARCHIVE_EXTENSION.match(element): # archive in archive! return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { 'author': False, } alternate_fields = { ('name', 'title'): False, # alternate field, at least one # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] optional_result = [ ' or '.join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: detail.append({ 'summary': MANDATORY_FIELDS_MISSING, 'fields': mandatory_result }) if optional_result != []: detail.append({ 'summary': ALTERNATE_FIELDS_MISSING, 'fields': optional_result, }) return False, { 'metadata': detail } def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) deposit_status = archives_status and metadata_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { 'status': deposit.status, 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/api/private/deposit_list.py b/swh/deposit/api/private/deposit_list.py index a03d5a1a..f3e3b1ad 100644 --- a/swh/deposit/api/private/deposit_list.py +++ b/swh/deposit/api/private/deposit_list.py @@ -1,48 +1,48 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.fields import _UnvalidatedField from rest_framework.generics import ListAPIView from rest_framework.pagination import PageNumberPagination from rest_framework import serializers -from ..common import SWHPrivateAPIView +from . import SWHPrivateAPIView from ..converters import convert_status_detail from ...models import Deposit class DefaultPagination(PageNumberPagination): page_size = 100 page_size_query_param = 'page_size' class StatusDetailField(_UnvalidatedField): """status_detail field is a dict, we want a simple message instead. So, we reuse the convert_status_detail from deposit_status endpoint to that effect. """ def to_representation(self, value): return convert_status_detail(value) class DepositSerializer(serializers.ModelSerializer): status_detail = StatusDetailField() class Meta: model = Deposit fields = '__all__' class DepositList(ListAPIView, SWHPrivateAPIView): """Deposit request class to list the deposit's status per page. HTTP verbs supported: GET """ queryset = Deposit.objects.all().order_by('id') serializer_class = DepositSerializer pagination_class = DefaultPagination diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 1df08f36..8b834b04 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,234 +1,234 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date from swh.deposit import utils -from . import DepositReadMixin +from . import DepositReadMixin, SWHPrivateAPIView from ...config import SWH_PERSON, ARCHIVE_TYPE -from ..common import SWHGetDepositAPI, SWHPrivateAPIView +from ..common import SWHGetDepositAPI from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) else: # only 1 archive, no need to do fancy actions (and no cleanup step) yield archive_paths[0] -class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView, +class SWHDepositReadArchives(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = [r.archive.path for r in self._deposit_requests( deposit_id, request_type=ARCHIVE_TYPE)] with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') -class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView, +class SWHDepositReadMetadata(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client 'provider_type': 'deposit_client', 'metadata': {} }), 'tool': ('dict', { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } }) } def __init__(self): super().__init__() self.provider = self.config['provider'] self.tool = self.config['tool'] def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. Args: deposit (Deposit): Deposit model representation metadata (Dict): Metadata dict representation Returns: Tuple of author date, committer date. Those dates are swh normalized. """ commit_date = metadata.get('codemeta:datePublished') author_date = metadata.get('codemeta:dateCreated') if author_date and commit_date: pass elif commit_date: author_date = commit_date elif author_date: commit_date = author_date else: author_date = deposit.complete_date commit_date = deposit.complete_date return ( normalize_date(author_date), normalize_date(commit_date) ) def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ metadata = self._metadata_get(deposit) # Read information metadata data = { 'origin': { 'type': 'deposit', 'url': utils.origin_url_from(deposit), } } # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider['provider_name'] = deposit.client.last_name self.provider['provider_url'] = deposit.client.provider_url revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) author_date, commit_date = self._normalize_dates(deposit, metadata) data['revision'] = { 'synthetic': True, 'date': author_date, 'committer_date': commit_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, 'message': revision_msg, 'metadata': metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id) parent_revision = persistent_identifier.object_id data['revision']['parents'] = [parent_revision] data['branch_name'] = 'master' data['origin_metadata'] = { 'provider': self.provider, 'tool': self.tool, 'metadata': metadata } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, 'application/json' diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 8b907a4c..208a49eb 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,89 +1,83 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser from swh.model.identifiers import ( persistent_identifier, REVISION, DIRECTORY ) -from ..common import SWHPutDepositAPI, SWHPrivateAPIView +from . import SWHPrivateAPIView +from ..common import SWHPutDepositAPI from ...errors import make_error_dict, BAD_REQUEST from ...models import Deposit, DEPOSIT_STATUS_DETAIL from ...models import DEPOSIT_STATUS_LOAD_SUCCESS -class SWHUpdateStatusDeposit(SWHPutDepositAPI, SWHPrivateAPIView): +class SWHUpdateStatusDeposit(SWHPrivateAPIView, SWHPutDepositAPI): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser, ) def additional_checks(self, req, headers, collection_name, deposit_id=None): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists """ data = req.data status = data.get('status') if not status: msg = 'The status key is mandatory with possible values %s' % list( DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = 'Possible status in %s' % list(DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: swh_id = data.get('revision_id') if not swh_id: msg = 'Updating status to %s requires a revision_id key' % ( status, ) return make_error_dict(BAD_REQUEST, msg) return {} - def restrict_access(self, req, deposit=None): - """Remove restriction modification to 'partial' deposit. - Update is possible regardless of the existing status. - - """ - return None - def process_put(self, req, headers, collection_name, deposit_id): """Update the deposit's status Returns: 204 No content """ deposit = Deposit.objects.get(pk=deposit_id) deposit.status = req.data['status'] # checks already done before origin_url = req.data.get('origin_url') dir_id = req.data.get('directory_id') if dir_id: deposit.swh_id = persistent_identifier(DIRECTORY, dir_id) deposit.swh_id_context = persistent_identifier( DIRECTORY, dir_id, metadata={'origin': origin_url}) rev_id = req.data.get('revision_id') if rev_id: deposit.swh_anchor_id = persistent_identifier( REVISION, rev_id) deposit.swh_anchor_id_context = persistent_identifier( REVISION, rev_id, metadata={'origin': origin_url}) deposit.save() return {} diff --git a/swh/deposit/api/private/urls.py b/swh/deposit/api/private/urls.py index 14335f25..f3a0363c 100644 --- a/swh/deposit/api/private/urls.py +++ b/swh/deposit/api/private/urls.py @@ -1,41 +1,62 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from ...config import ( PRIVATE_GET_RAW_CONTENT, PRIVATE_PUT_DEPOSIT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_CHECK_DEPOSIT, PRIVATE_LIST_DEPOSITS ) from .deposit_read import SWHDepositReadArchives from .deposit_read import SWHDepositReadMetadata from .deposit_update_status import SWHUpdateStatusDeposit from .deposit_check import SWHChecksDeposit from .deposit_list import DepositList urlpatterns = [ # Retrieve deposit's raw archives' content # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/raw/$', SWHDepositReadArchives.as_view(), name=PRIVATE_GET_RAW_CONTENT), # Update deposit's status # -> PUT url(r'^(?P[^/]+)/(?P[^/]+)/update/$', SWHUpdateStatusDeposit.as_view(), name=PRIVATE_PUT_DEPOSIT), # Retrieve metadata information on a specific deposit # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/meta/$', SWHDepositReadMetadata.as_view(), name=PRIVATE_GET_DEPOSIT_METADATA), # Check archive and metadata information on a specific deposit # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/check/$', SWHChecksDeposit.as_view(), name=PRIVATE_CHECK_DEPOSIT), + # Retrieve deposit's raw archives' content + # -> GET + url(r'^(?P[^/]+)/raw/$', + SWHDepositReadArchives.as_view(), + name=PRIVATE_GET_RAW_CONTENT+'-nc'), + # Update deposit's status + # -> PUT + url(r'^(?P[^/]+)/update/$', + SWHUpdateStatusDeposit.as_view(), + name=PRIVATE_PUT_DEPOSIT+'-nc'), + # Retrieve metadata information on a specific deposit + # -> GET + url(r'^(?P[^/]+)/meta/$', + SWHDepositReadMetadata.as_view(), + name=PRIVATE_GET_DEPOSIT_METADATA+'-nc'), + # Check archive and metadata information on a specific deposit + # -> GET + url(r'^(?P[^/]+)/check/$', + SWHChecksDeposit.as_view(), + name=PRIVATE_CHECK_DEPOSIT+'-nc'), + url(r'^deposits/$', DepositList.as_view(), name=PRIVATE_LIST_DEPOSITS) ] diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py index 680cb034..86d1d607 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,234 +1,236 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import ( DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED ) from swh.deposit.api.private.deposit_check import ( SWHChecksDeposit, MANDATORY_ARCHIVE_INVALID, MANDATORY_FIELDS_MISSING, MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING, MANDATORY_ARCHIVE_MISSING ) from swh.deposit.models import Deposit from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine @pytest.mark.fs class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit_id]) + def test_deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) def test_deposit_invalid_tarball(self): """Deposit with tarball (of 1 tarball) should fail the checks: rejected """ for archive_extension in ['zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz']: deposit_id = self.create_deposit_archive_with_archive( archive_extension) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_INVALID) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_deposit_ko_missing_tarball(self): """Deposit without archive should fail the checks: rejected """ deposit_id = self.create_deposit_ready() # no archive, only atom deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_MISSING) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_deposit_ko_unsupported_tarball(self): """Deposit with an unsupported tarball should fail the checks: rejected """ deposit_id = self.create_deposit_with_invalid_archive() deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_UNSUPPORTED) # metadata check failure self.assertEqual(len(details['metadata']), 2) mandatory = details['metadata'][0] self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING) self.assertEqual(set(mandatory['fields']), set(['author'])) alternate = details['metadata'][1] self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING) self.assertEqual(alternate['fields'], ['name or title']) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEqual(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) +@pytest.mark.fs +class CheckDepositTest2(CheckDepositTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_CHECK_DEPOSIT+'-nc', + args=[deposit_id]) + + class CheckMetadata(unittest.TestCase, SWHChecksDeposit): def test_check_metadata_ok(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) def test_check_metadata_ok2(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) def test_check_metadata_ko(self): """Missing optional field should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory alternate fields are missing', 'fields': ['name or title'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) def test_check_metadata_ko2(self): """Missing mandatory fields should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory fields are missing', 'fields': ['author'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) diff --git a/swh/deposit/tests/api/test_deposit_read_archive.py b/swh/deposit/tests/api/test_deposit_read_archive.py index 07d16c3f..b4ec2f41 100644 --- a/swh/deposit/tests/api/test_deposit_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_read_archive.py @@ -1,125 +1,98 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import os from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.core import tarball from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.tests import TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine, create_arborescence_archive @pytest.mark.fs class DepositReadArchivesTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() self.archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') self.workdir = os.path.join(self.root_path, 'workdir') + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_RAW_CONTENT, + args=[self.collection.name, deposit_id]) + def test_access_to_existing_deposit_with_one_archive(self): """Access to deposit should stream a 200 response with its raw content """ deposit_id = self.create_simple_binary_deposit() - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) r = self.client.get(url) self.assertEqual(r.status_code, status.HTTP_200_OK) self.assertEqual(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self.assertEqual(actual_sha1, self.archive['sha1sum']) # this does not touch the extraction dir so this should stay empty self.assertEqual(os.listdir(TEST_CONFIG['extraction_dir']), []) def _check_tarball_consistency(self, actual_sha1): tarball.uncompress(self.archive['path'], self.workdir) self.assertEqual(os.listdir(self.workdir), ['file1']) tarball.uncompress(self.archive2['path'], self.workdir) lst = set(os.listdir(self.workdir)) self.assertEqual(lst, {'file1', 'file2'}) new_path = self.workdir + '.zip' tarball.compress(new_path, 'zip', self.workdir) with open(new_path, 'rb') as f: h = hashlib.sha1(f.read()).hexdigest() self.assertEqual(actual_sha1, h) self.assertNotEqual(actual_sha1, self.archive['sha1sum']) self.assertNotEqual(actual_sha1, self.archive2['sha1sum']) def test_access_to_existing_deposit_with_multiple_archives(self): """Access to deposit should stream a 200 response with its raw contents """ deposit_id = self.create_complex_binary_deposit() - - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) r = self.client.get(url) self.assertEqual(r.status_code, status.HTTP_200_OK) self.assertEqual(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self._check_tarball_consistency(actual_sha1) # this touches the extraction directory but should clean up # after itself self.assertEqual(os.listdir(TEST_CONFIG['extraction_dir']), []) -class DepositReadArchivesFailureTest(APITestCase, WithAuthTestCase, - BasicTestCase, CommonCreationRoutine): - def test_access_to_nonexisting_deposit_returns_404_response(self): - """Read unknown collection should return a 404 response - - """ - unknown_id = '999' - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[self.collection.name, unknown_id]) - - response = self.client.get(url) - self.assertEqual(response.status_code, - status.HTTP_404_NOT_FOUND) - self.assertIn('Deposit with id %s does not exist' % unknown_id, - response.content.decode('utf-8')) - - def test_access_to_nonexisting_collection_returns_404_response(self): - """Read unknown deposit should return a 404 response - - """ - collection_name = 'non-existing' - deposit_id = self.create_deposit_partial() - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[collection_name, deposit_id]) - - response = self.client.get(url) - self.assertEqual(response.status_code, - status.HTTP_404_NOT_FOUND) - self.assertIn('Unknown collection name %s' % collection_name, - response.content.decode('utf-8')) +@pytest.mark.fs +class DepositReadArchivesTest2(DepositReadArchivesTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_RAW_CONTENT+'-nc', args=[deposit_id]) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py index 0e7e38aa..e4110a5a 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,661 +1,644 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.template_metadata = """ Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter %s """ + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + def test_read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'committer_date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEqual(deposit_parent.swh_id, swh_persistent_id) self.assertEqual(deposit_parent.external_id, 'some-external-id') self.assertEqual(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.external_id, 'some-external-id') self.assertEqual(deposit.swh_id, None) self.assertEqual(deposit.parent, deposit_parent) self.assertEqual(deposit.status, DEPOSIT_STATUS_PARTIAL) - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'committer_date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'parents': [swh_id] }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_read_metadata_3(self): """date(Created|Published) provided, uses author/committer date """ # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = self.template_metadata % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 """ deposit_id = self.create_deposit_partial_with_data_in_args( codemeta_entry_data) - - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_origin = { 'type': 'deposit', 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' } expected_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, 'client': 'hal', 'codemeta:applicationCategory': 'test', 'codemeta:author': { 'codemeta:name': 'Morane Gruenpeter' }, 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00', 'codemeta:datePublished': '2017-05-03T16:08:47+02:00', 'codemeta:description': 'this is the description', 'codemeta:developmentStatus': 'stable', 'codemeta:keywords': 'DSP programming', 'codemeta:license': [ { 'codemeta:name': 'GNU General Public License v3.0 only' }, { 'codemeta:name': 'CeCILL Free Software License Agreement v1.1' } ], 'codemeta:programmingLanguage': [ 'php', 'python', 'C' ], 'codemeta:runtimePlatform': 'phpstorm', 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa 'codemeta:version': '1', 'external_identifier': 'hal-01243065', 'id': 'hal-01243065', 'title': 'Composing a Web of Audio Applications' } expected_origin_metadata = { 'metadata': expected_metadata, 'provider': { 'metadata': {}, 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/' }, 'tool': { 'configuration': { 'sword_version': '2' }, 'name': 'swh-deposit', 'version': '0.0.1' } } expected_revision = { 'author': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer_date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1493820527 } }, 'date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1428332927 } }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': expected_metadata, 'synthetic': True, 'type': 'tar' } expected_meta = { 'branch_name': 'master', 'origin': expected_origin, 'origin_metadata': expected_origin_metadata, 'revision': expected_revision, } self.assertEqual(data, expected_meta) def test_read_metadata_4(self): """dateCreated/datePublished not provided, revision uses complete_date """ codemeta_entry_data = self.template_metadata % '' deposit_id = self.create_deposit_partial_with_data_in_args( codemeta_entry_data) # will use the deposit completed date as fallback date deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = '2016-04-06' deposit.save() - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_origin = { 'type': 'deposit', 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' } expected_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, 'client': 'hal', 'codemeta:applicationCategory': 'test', 'codemeta:author': { 'codemeta:name': 'Morane Gruenpeter' }, 'codemeta:description': 'this is the description', 'codemeta:developmentStatus': 'stable', 'codemeta:keywords': 'DSP programming', 'codemeta:license': [ { 'codemeta:name': 'GNU General Public License v3.0 only' }, { 'codemeta:name': 'CeCILL Free Software License Agreement v1.1' } ], 'codemeta:programmingLanguage': [ 'php', 'python', 'C' ], 'codemeta:runtimePlatform': 'phpstorm', 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa 'codemeta:version': '1', 'external_identifier': 'hal-01243065', 'id': 'hal-01243065', 'title': 'Composing a Web of Audio Applications' } expected_origin_metadata = { 'metadata': expected_metadata, 'provider': { 'metadata': {}, 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/' }, 'tool': { 'configuration': { 'sword_version': '2' }, 'name': 'swh-deposit', 'version': '0.0.1' } } expected_revision = { 'author': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer_date': { 'negative_utc': False, 'offset': 0, 'timestamp': { 'microseconds': 0, 'seconds': 1459900800 } }, 'date': { 'negative_utc': False, 'offset': 0, 'timestamp': { 'microseconds': 0, 'seconds': 1459900800 } }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': expected_metadata, 'synthetic': True, 'type': 'tar' } expected_meta = { 'branch_name': 'master', 'origin': expected_origin, 'origin_metadata': expected_origin_metadata, 'revision': expected_revision, } self.assertEqual(data, expected_meta) def test_read_metadata_5(self): """dateCreated/datePublished provided, revision uses author/committer date If multiple dateCreated provided, the first occurrence (of dateCreated) is selected. If multiple datePublished provided, the first occurrence (of datePublished) is selected. """ # add metadata to the deposit with multiple datePublished/dateCreated codemeta_entry_data = self.template_metadata % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 2016-04-06T17:08:47+02:00 2018-05-03T16:08:47+02:00 """ deposit_id = self.create_deposit_partial_with_data_in_args( codemeta_entry_data) - - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_origin = { 'type': 'deposit', 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' } expected_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, 'client': 'hal', 'codemeta:applicationCategory': 'test', 'codemeta:author': { 'codemeta:name': 'Morane Gruenpeter' }, 'codemeta:dateCreated': [ '2015-04-06T17:08:47+02:00', '2016-04-06T17:08:47+02:00', ], 'codemeta:datePublished': [ '2017-05-03T16:08:47+02:00', '2018-05-03T16:08:47+02:00', ], 'codemeta:description': 'this is the description', 'codemeta:developmentStatus': 'stable', 'codemeta:keywords': 'DSP programming', 'codemeta:license': [ { 'codemeta:name': 'GNU General Public License v3.0 only' }, { 'codemeta:name': 'CeCILL Free Software License Agreement v1.1' } ], 'codemeta:programmingLanguage': [ 'php', 'python', 'C' ], 'codemeta:runtimePlatform': 'phpstorm', 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa 'codemeta:version': '1', 'external_identifier': 'hal-01243065', 'id': 'hal-01243065', 'title': 'Composing a Web of Audio Applications' } expected_origin_metadata = { 'metadata': expected_metadata, 'provider': { 'metadata': {}, 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/' }, 'tool': { 'configuration': { 'sword_version': '2' }, 'name': 'swh-deposit', 'version': '0.0.1' } } expected_revision = { 'author': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer_date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1493820527 } }, 'date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1428332927 } }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': expected_metadata, 'synthetic': True, 'type': 'tar' } expected_meta = { 'branch_name': 'master', 'origin': expected_origin, 'origin_metadata': expected_origin_metadata, 'revision': expected_revision, } self.assertEqual(data, expected_meta) def test_access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, unknown_id]) - + url = self.private_deposit_url(unknown_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) - def test_access_to_nonexisting_collection_returns_404_response(self): - """Read unknown deposit should return a 404 response - """ - collection_name = 'non-existing' - deposit_id = self.create_deposit_partial() - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[collection_name, deposit_id]) - - response = self.client.get(url) - self.assertEqual(response.status_code, - status.HTTP_404_NOT_FOUND) - self.assertIn('Unknown collection name %s' % collection_name, - response.content.decode('utf-8'),) +class DepositReadMetadataTest2(DepositReadMetadataTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_DEPOSIT_METADATA+'-nc', + args=[deposit_id]) diff --git a/swh/deposit/tests/api/test_deposit_update_status.py b/swh/deposit/tests/api/test_deposit_update_status.py index d338ceab..7e6185f3 100644 --- a/swh/deposit/tests/api/test_deposit_update_status.py +++ b/swh/deposit/tests/api/test_deposit_update_status.py @@ -1,130 +1,134 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.config import PRIVATE_PUT_DEPOSIT, DEPOSIT_STATUS_VERIFIED from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from ..common import BasicTestCase class UpdateDepositStatusTest(APITestCase, BasicTestCase): """Update the deposit's status scenario """ def setUp(self): super().setUp() deposit = Deposit(status=DEPOSIT_STATUS_VERIFIED, collection=self.collection, client=self.user) deposit.save() self.deposit = Deposit.objects.get(pk=deposit.id) assert self.deposit.status == DEPOSIT_STATUS_VERIFIED + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_PUT_DEPOSIT, + args=[self.collection.name, deposit_id]) + def test_update_deposit_status(self): """Existing status for update should return a 204 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) possible_status = set(DEPOSIT_STATUS_DETAIL.keys()) - set( [DEPOSIT_STATUS_LOAD_SUCCESS]) for _status in possible_status: response = self.client.put( url, content_type='application/json', data=json.dumps({'status': _status})) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEqual(deposit.status, _status) def test_update_deposit_status_with_info(self): """Existing status for update with info should return a 204 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) expected_status = DEPOSIT_STATUS_LOAD_SUCCESS origin_url = 'something' directory_id = '42a13fc721c8716ff695d0d62fc851d641f3a12b' revision_id = '47dc6b4636c7f6cba0df83e3d5490bf4334d987e' expected_swh_id = 'swh:1:dir:%s' % directory_id expected_swh_id_context = 'swh:1:dir:%s;origin=%s' % ( directory_id, origin_url) expected_swh_anchor_id = 'swh:1:rev:%s' % revision_id expected_swh_anchor_id_context = 'swh:1:rev:%s;origin=%s' % ( revision_id, origin_url) response = self.client.put( url, content_type='application/json', data=json.dumps({ 'status': expected_status, 'revision_id': revision_id, 'directory_id': directory_id, 'origin_url': origin_url, })) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEqual(deposit.status, expected_status) self.assertEqual(deposit.swh_id, expected_swh_id) self.assertEqual(deposit.swh_id_context, expected_swh_id_context) self.assertEqual(deposit.swh_anchor_id, expected_swh_anchor_id) self.assertEqual(deposit.swh_anchor_id_context, expected_swh_anchor_id_context) def test_update_deposit_status_will_fail_with_unknown_status(self): """Unknown status for update should return a 400 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': 'unknown'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_will_fail_with_no_status_key(self): """No status provided for update should return a 400 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) response = self.client.put( url, content_type='application/json', data=json.dumps({'something': 'something'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_success_without_swh_id_fail(self): """Providing successful status without swh_id should return a 400 """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': DEPOSIT_STATUS_LOAD_SUCCESS})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + +class UpdateDepositStatusTest2(UpdateDepositStatusTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_PUT_DEPOSIT+'-nc', args=[deposit_id])