diff --git a/bin/Makefile b/bin/Makefile index 6507624b..fe693dd8 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -1,33 +1,40 @@ DEPOSIT_ID=1 ARCHIVE=../../swh-deposit.zip ARCHIVE2=../../swh-model.zip STATUS=false PARTIAL_STATUS=true +UPDATE_STATUS='success' create-archives: 7z a $(ARCHIVE) $(FOLDER) 7z a $(ARCHIVE2) $(FOLDER2) new: ./create_deposit.sh $(ARCHIVE) $(STATUS) new-partial: make new STATUS=$(PARTIAL_STATUS) ARCHIVE=$(ARCHIVE) update: ./update-deposit-with-another-archive.sh $(DEPOSIT_ID) $(ARCHIVE_2) $(STATUS) update-partial: make update DEPOSIT_ID=$(DEPOSIT_ID) ARCHIVE2=$(ARCHIVE2) STATUS=$(PARTIAL_STATUS) replace: ./replace-deposit-archive.sh $(ARCHIVE2) $(DEPOSIT_ID) download: ./download-deposit-archive.sh $(DEPOSIT_ID) status: ./status.sh $(DEPOSIT_ID) service-document: ./service-document.sh + +home: + ./home.sh + +update-status: + ./update-status.sh $(DEPOSIT_ID) $(UPDATE_STATUS) diff --git a/bin/download-deposit-archive.sh b/bin/download-deposit-archive.sh index 50444b6c..2b875e31 100755 --- a/bin/download-deposit-archive.sh +++ b/bin/download-deposit-archive.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash . ./default-setup DEPOSIT_ID=${1-1} -curl -u "$CREDS" ${SERVER}/1/${COLLECTION}/${DEPOSIT_ID}/raw/ +curl ${SERVER}/1/${COLLECTION}/${DEPOSIT_ID}/raw/ diff --git a/bin/home.sh b/bin/home.sh new file mode 100755 index 00000000..d3cf2df4 --- /dev/null +++ b/bin/home.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +. ./default-setup + +curl ${SERVER} +echo diff --git a/bin/update-status.sh b/bin/update-status.sh new file mode 100755 index 00000000..5be1cafb --- /dev/null +++ b/bin/update-status.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +. ./default-setup + +DEPOSIT_ID=${1-1} +UPDATE_STATUS=${2-'success'} + +curl -i \ + -X PUT \ + -H 'Content-Type: application/json' \ + -d "{\"status\": \"${UPDATE_STATUS}\"}" \ + ${SERVER}/1/${COLLECTION}/${DEPOSIT_ID}/update/ diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 1f700e61..733996c5 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,825 +1,826 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from abc import ABCMeta, abstractmethod from django.core.urlresolvers import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.views import APIView -from rest_framework.authentication import BasicAuthentication, SessionAuthentication -from django.contrib.auth.middleware import AuthenticationMiddleware +from rest_framework.permissions import IsAuthenticated, AllowAny from ..config import SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI from ..config import ARCHIVE_KEY, METADATA_KEY from ..models import Deposit, DepositRequest, DepositCollection from ..models import DepositRequestType, DepositClient from ..parsers import parse_xml from ..errors import MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT from ..errors import CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED from ..errors import make_error_response_from_dict, FORBIDDEN from ..errors import NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] ACCEPT_CONTENT_TYPES = ['application/zip'] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ - authentication_classes = (BasicAuthentication, SessionAuthentication, ) + permission_classes = (IsAuthenticated, ) class SWHPrivateAPIView(SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ - authentication_classes = () + permission_classes = (AllowAny, ) class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def __init__(self): super().__init__() deposit_request_types = DepositRequestType.objects.all() self.deposit_request_types = { type.name: type for type in deposit_request_types } def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type content_length = meta.get('CONTENT_LENGTH') if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get('HTTP_IN_PROGRESS', False) content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') if isinstance(in_progress, str): in_progress = in_progress.lower() == 'true' content_md5sum = meta.get('HTTP_CONTENT_MD5') if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get('HTTP_PACKAGING') slug = meta.get('HTTP_SLUG') on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') return { 'content-type': content_type, 'content-length': content_length, 'in-progress': in_progress, 'content-disposition': content_disposition, 'content-md5sum': content_md5sum, 'packaging': packaging, 'slug': slug, 'on-behalf-of': on_behalf_of, 'metadata-relevant': metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put(self, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = 'ready' else: complete_date = None status_type = 'partial' if not deposit_id: deposit = Deposit(collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type deposit.save() return deposit def _deposit_request_put(self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types[METADATA_KEY]).delete() if replace_archives: DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types[ARCHIVE_KEY]).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=self.deposit_request_types[ARCHIVE_KEY], deposit=deposit, archive=archive_file) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: deposit_request = DepositRequest( type=self.deposit_request_types[METADATA_KEY], deposit=deposit, metadata=metadata) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types[ARCHIVE_KEY]).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) if deposit.collection.name != collection_name: summary = 'Cannot delete a deposit from another collection' description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config['max_upload_size']: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, 'Upload size limit exceeded (max %s bytes).' % self.config['max_upload_size'], 'Please consider sending the archive in ' 'multiple steps.') length = filehandler.size if length != content_length: return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, 'Wrong length') if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, 'Wrong md5 hash', 'The checksum sent %s and the actual checksum ' '%s does not match.' % (md5sum, _md5sum)) return None def _binary_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 403 (forbidden) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers['content-length'] if not content_length: return make_error_dict( BAD_REQUEST, 'CONTENT_LENGTH header is mandatory', 'For archive deposit, the ' 'CONTENT_LENGTH header must be sent.') content_disposition = headers['content-disposition'] if not content_disposition: return make_error_dict( BAD_REQUEST, 'CONTENT_DISPOSITION header is mandatory', 'For archive deposit, the ' 'CONTENT_DISPOSITION header must be sent.') packaging = headers['packaging'] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, 'Only packaging %s is supported' % ACCEPT_PACKAGINGS, 'The packaging provided %s is not supported' % packaging) filehandler = req.FILES['file'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum'], content_length) if precondition_status_response: return precondition_status_response external_id = headers['slug'] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, } def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers['slug'] content_types_present = set() data = { 'application/zip': None, # expected archive 'application/atom+xml': None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, 'Only 1 application/zip archive and 1 ' 'atom+xml entry is supported (as per sword2.0 ' 'specification)', 'You provided more than 1 application/zip ' 'or more than 1 application/atom+xml content-disposition ' 'header in the multipart deposit') content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, 'You must provide both 1 application/zip ' 'and 1 atom+xml entry for multipart deposit', 'You need to provide only 1 application/zip ' 'and 1 application/atom+xml content-disposition header ' 'in the multipart deposit') filehandler = data['application/zip'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum']) if precondition_status_response: return precondition_status_response # actual storage of data atom_metadata = parse_xml(data['application/atom+xml']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: atom_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, } def _atom_entry(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ if not req.data: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') external_id = req.data.get( '{http://www.w3.org/2005/Atom}external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {METADATA_KEY: req.data}, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': None, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = 'ready' deposit.save() return { 'deposit_id': deposit_id, 'deposit_date': deposit.complete_date, 'archive': None, } def _make_iris(self, collection_name, deposit_id): """Define the IRI endpoints Args: collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ return { EM_IRI: reverse( EM_IRI, args=[collection_name, deposit_id]), EDIT_SE_IRI: reverse( EDIT_SE_IRI, args=[collection_name, deposit_id]), CONT_FILE_IRI: reverse( CONT_FILE_IRI, args=[collection_name, deposit_id]), } def additional_checks(self, req, collection_name, deposit_id=None): """Permit the child class to enrich with additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get( name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, 'Unknown collection name %s' % collection_name) - try: - username = req.user.username - self._client = DepositClient.objects.get(username=username) - except DepositClient.DoesNotExist: - return make_error_dict(NOT_FOUND, - 'Unknown client name %s' % username) - - if self._collection.id not in self._client.collections: - return make_error_dict(FORBIDDEN, - 'Client %s cannot access collection %s' % ( - username, collection_name)) + username = req.user.username + if username: # unauthenticated request can have the username empty + try: + self._client = DepositClient.objects.get(username=username) + except DepositClient.DoesNotExist: + return make_error_dict(NOT_FOUND, + 'Unknown client name %s' % username) + + if self._collection.id not in self._client.collections: + return make_error_dict( + FORBIDDEN, + 'Client %s cannot access collection %s' % ( + username, collection_name)) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) if headers['on-behalf-of']: return make_error_dict(MEDIATION_NOT_ALLOWED, 'Mediation is not supported.') checks = self.additional_checks(req, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} def restrict_access(self, req, deposit=None): if deposit: if req.method != 'GET' and deposit.status != 'partial': summary = "You can only act on deposit with status 'partial'" description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) def get(self, req, *args, **kwargs): return make_error_response(req, METHOD_NOT_ALLOWED) def post(self, req, *args, **kwargs): return make_error_response(req, METHOD_NOT_ALLOWED) def put(self, req, *args, **kwargs): return make_error_response(req, METHOD_NOT_ALLOWED) def delete(self, req, *args, **kwargs): return make_error_response(req, METHOD_NOT_ALLOWED) class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) status, content, content_type = self.process_get( req, collection_name, deposit_id) return HttpResponse(content, status=status, content_type=content_type) @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] _status, _iri_key, data = self.process_post( req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) data['packagings'] = ACCEPT_PACKAGINGS iris = self._make_iris(collection_name, data['deposit_id']) data.update(iris) response = render(req, 'deposit/deposit_receipt.xml', context=data, content_type='application/xml', status=_status) response._headers['location'] = 'Location', data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] data = self.process_put(req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) data = self.process_delete(req, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 15052129..1a5bdca7 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,145 +1,145 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import tempfile from rest_framework import status from swh.loader.tar import tarball -from ..common import SWHGetDepositAPI +from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...models import Deposit, DepositRequest, TemporaryArchive def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive # from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit.scheduler-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # clean up temporary uncompressed tarball's on-disk content shutil.rmtree(aggregated_tarball_rootdir) # need to cleanup the temporary tarball when we are done directory_to_cleanup = dir_path else: # only 1 archive, no need to do fancy actions (and no cleanup step) temp_tarpath = archive_paths[0] directory_to_cleanup = None return directory_to_cleanup, temp_tarpath def stream_content(tarpath): """Stream a tarpath's content. Args: tarpath (path): Path to a tarball Raises: ValueError if the tarpath targets something nonexistent """ if not os.path.exists(tarpath): raise ValueError('Development error: %s should exist' % tarpath) with open(tarpath, 'rb') as f: for chunk in f: yield chunk -class SWHDepositReadArchives(SWHGetDepositAPI): +class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/') } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def retrieve_archives(self, deposit_id): """Given a deposit identifier, returns its associated archives' path. Yields: path to deposited archives """ deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']).order_by('id') for deposit_request in deposit_requests: yield deposit_request.archive.path def cleanup(self, directory_to_cleanup): """Reference the temporary directory holding the archive to be cleaned up. This actually does not clean up but add a reference for a directory to be cleaned up if it exists. Args: directory_to_cleanup (str/None): A reference to a directory to be cleaned up """ if directory_to_cleanup: # Add a temporary directory to be cleaned up in the db model # Another service is in charge of actually cleaning up if os.path.exists(directory_to_cleanup): tmp_archive = TemporaryArchive(path=directory_to_cleanup) tmp_archive.save() def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = list(self.retrieve_archives(deposit_id)) directory_to_cleanup, temp_tarpath = aggregate_tarballs( self.extraction_dir, archive_paths) stream = stream_content(temp_tarpath) self.cleanup(directory_to_cleanup) return status.HTTP_200_OK, stream, 'application/octet-stream' diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 03091a83..b1279a09 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,59 +1,59 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser -from ..common import SWHPutDepositAPI +from ..common import SWHPutDepositAPI, SWHPrivateAPIView from ...errors import make_error_dict, BAD_REQUEST from ...models import Deposit, DEPOSIT_STATUS_DETAIL -class SWHUpdateStatusDeposit(SWHPutDepositAPI): +class SWHUpdateStatusDeposit(SWHPutDepositAPI, SWHPrivateAPIView): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser, ) def additional_checks(self, req, collection_name, deposit_id=None): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists """ status = req.data.get('status') if not status: msg = 'The status key is mandatory with possible values %s' % list( DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = 'Possible status in %s' % list(DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) return {} def restrict_access(self, req, deposit=None): """Remove restriction modification to 'partial' deposit. Update is possible regardless of the existing status. """ return None def process_put(self, req, headers, collection_name, deposit_id): """Update the deposit's status Returns: 204 No content """ deposit = Deposit.objects.get(pk=deposit_id) deposit.status = req.data['status'] # checks already done before deposit.save() return {} diff --git a/swh/deposit/auth.py b/swh/deposit/auth.py index 65538c24..dbb5155e 100644 --- a/swh/deposit/auth.py +++ b/swh/deposit/auth.py @@ -1,83 +1,64 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import base64 +from rest_framework import status -from django.contrib.auth import authenticate - -from .config import SWHDefaultConfig from .errors import UNAUTHORIZED, make_error_response -def view_or_basicauth(view, request, test_func, realm="", *args, **kwargs): - """This determines if the request has already provided proper - http-authorization or not. If it did, returns the view. Otherwise, - respond with a 401. +def convert_response(request, content): + """Convert response from drf's basic authentication mechanism to a + swh-deposit one. + + Args: + request (Request): Use to build the response + content (bytes): The drf's answer - Note: Only basic realm is supported. + Returns: + + Response with the same status error as before, only the + body is now an swh-deposit compliant one. """ - if test_func(request.user): - # Already logged in, just return the view. - return view(request, *args, **kwargs) - - # They are not logged in. See if they provided login credentials - if 'HTTP_AUTHORIZATION' in request.META: - auth = request.META['HTTP_AUTHORIZATION'].split() - if len(auth) == 2: - # NOTE: Only support basic authentication - if auth[0].lower() == "basic": - authorization_token = base64.b64decode(auth[1]).decode('utf-8') - uname, passwd = authorization_token.split(':', 1) - user = authenticate(username=uname, password=passwd) - if user is not None and user.is_active: - request.user = user - if test_func(request.user): - return view(request, *args, **kwargs) - - # Either they did not provide an authorization header or - # something in the authorization attempt failed. Send a 401 - # back to them to ask them to authenticate. - response = make_error_response(request, UNAUTHORIZED, - 'Access to this api needs authentication') - response['WWW-Authenticate'] = 'Basic realm="%s"' % realm + from json import loads + + content = loads(content.decode('utf-8')) + detail = content.get('detail') + if detail: + verbose_description = 'API is protected by basic authentication' + else: + detail = 'API is protected by basic authentication' + verbose_description = None + + response = make_error_response( + request, + UNAUTHORIZED, + summary=detail, + verbose_description=verbose_description) + response['WWW-Authenticate'] = 'Basic realm=""' + return response -class HttpBasicAuthMiddleware(SWHDefaultConfig): - """Middleware to install or not the basic authentication layer - according to swh's yaml configuration. +class WrapBasicAuthenticationResponseMiddleware: + """Middleware to capture potential authentication error and convert + them to standard deposit response. - Note: white-list authentication is supported (cf. DEFAULT_CONFIG) + This is to be installed in django's settings.py module. """ - ADDITIONAL_CONFIG = { - 'authentication': ('dict', { - 'activated': 'true', - 'white-list': { - 'GET': ['/'], - } - }) - } - def __init__(self, get_response): super().__init__() self.get_response = get_response - self.auth = self.config['authentication'] - self.auth_activated = self.auth['activated'] - if self.auth_activated: - self.whitelist = self.auth.get('white-list', {}) def __call__(self, request): - if self.auth_activated: - whitelist = self.whitelist.get(request.method) - if whitelist and request.path in whitelist: - return self.get_response(request) - - r = view_or_basicauth(view=self.get_response, - request=request, - test_func=lambda u: u.is_authenticated()) - return r - return self.get_response(request) + response = self.get_response(request) + + if response.status_code is status.HTTP_401_UNAUTHORIZED: + content_type = response._headers.get('content-type') + if content_type == ('Content-Type', 'application/json'): + return convert_response(request, response.content) + + return response diff --git a/swh/deposit/settings/common.py b/swh/deposit/settings/common.py index ee321b60..ff2623e8 100644 --- a/swh/deposit/settings/common.py +++ b/swh/deposit/settings/common.py @@ -1,122 +1,121 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django settings for swh project. Generated by 'django-admin startproject' using Django 1.10.7. For more information on this file, see https://docs.djangoproject.com/en/1.10/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/1.10/ref/settings/ """ import os # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ ALLOWED_HOSTS = ['127.0.0.1', 'localhost'] # Application definition INSTALLED_APPS = [ 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.staticfiles', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.postgres', # for JSONField, ArrayField 'swh.deposit.apps.DepositConfig', ] MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'swh.deposit.auth.HttpBasicAuthMiddleware', + 'swh.deposit.auth.WrapBasicAuthenticationResponseMiddleware', ] ROOT_URLCONF = 'swh.deposit.urls' TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], }, }, ] WSGI_APPLICATION = 'swh.deposit.wsgi.application' # Password validation # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa }, ] # Internationalization # https://docs.djangoproject.com/en/1.10/topics/i18n/ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'UTC' USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.10/howto/static-files/ STATIC_URL = '/static/' REST_FRAMEWORK = { 'DEFAULT_AUTHENTICATION_CLASSES': ( 'rest_framework.authentication.BasicAuthentication', - 'rest_framework.authentication.SessionAuthentication', - ) + ), } FILE_UPLOAD_HANDLERS = [ "django.core.files.uploadhandler.MemoryFileUploadHandler", "django.core.files.uploadhandler.TemporaryFileUploadHandler", ] diff --git a/swh/deposit/tests/api/test_deposit_update_status.py b/swh/deposit/tests/api/test_deposit_update_status.py index bb2c529a..c7823648 100644 --- a/swh/deposit/tests/api/test_deposit_update_status.py +++ b/swh/deposit/tests/api/test_deposit_update_status.py @@ -1,72 +1,74 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.core.urlresolvers import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.config import PRIVATE_PUT_DEPOSIT -from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine +from ..common import BasicTestCase -class UpdateDepositStatusTest(APITestCase, WithAuthTestCase, BasicTestCase, - CommonCreationRoutine): +class UpdateDepositStatusTest(APITestCase, BasicTestCase): """Update the deposit's status scenario """ def setUp(self): super().setUp() - deposit_id = self.create_deposit_ready() - self.deposit = Deposit.objects.get(pk=deposit_id) + deposit = Deposit(status='ready', + collection=self.collection, + client=self.user) + deposit.save() + self.deposit = Deposit.objects.get(pk=deposit.id) assert self.deposit.status == 'ready' def test_update_deposit_status(self): """Existing status for update should return a 204 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) for _status in DEPOSIT_STATUS_DETAIL.keys(): response = self.client.put( url, content_type='application/json', data=json.dumps({'status': _status})) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEquals(deposit.status, _status) def test_update_deposit_status_will_fail_with_unknown_status(self): """Unknown status for update should return a 400 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': 'unknown'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_will_fail_with_no_status_key(self): """No status provided for update should return a 400 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) response = self.client.put( url, content_type='application/json', data=json.dumps({'something': 'something'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index 16ee9891..ad3aafed 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,165 +1,165 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import base64 import hashlib import os import shutil from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from rest_framework import status from swh.deposit.config import COL_IRI, EM_IRI from swh.deposit.models import DepositClient, DepositCollection from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name) _client.collections = [_collection.id] _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): - """Mixin intended for login/logout automatically during setUp/tearDown - test method call. + """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() - r = self.client.login(username=self.username, password=self.userpass) - if not r: - raise ValueError( - 'Dev error - test misconfiguration. Bad credentials provided!') + _token = '%s:%s' % (self.username, self.userpass) + token = base64.b64encode(_token.encode('utf-8')) + authorization = 'Basic %s' % token.decode('utf-8') + self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() - self.client.logout() + self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id """ def create_simple_deposit_partial(self): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.username]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_IN_PROGRESS='true') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `ready` (by default). Returns: deposit id """ # add an archive data_text = b'some content' md5sum = hashlib.md5(data_text).hexdigest() # when response = self.client.post( reverse(EM_IRI, args=[self.username, deposit_id]), content_type='application/zip', # as zip data=data_text, # + headers HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_LENGTH=len(data_text), HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self): """Create a complex deposit (2 requests) in status `ready`. """ deposit_id = self.create_simple_deposit_partial() deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial() deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id