diff --git a/PKG-INFO b/PKG-INFO index 259442ea..2d53f760 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.55 +Version: 0.0.56 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/docs/endpoints/status.rst b/docs/endpoints/status.rst index c6e4f664..ad8a0e74 100644 --- a/docs/endpoints/status.rst +++ b/docs/endpoints/status.rst @@ -1,29 +1,73 @@ Retrieve status ^^^^^^^^^^^^^^^^ .. http:get:: /1/// - Display deposit's status in regards to loading. - + Returns deposit's status. The different statuses: - **partial**: multipart deposit is still ongoing - - **deposited**: deposit completed + - **deposited**: deposit completed, ready for checks - **rejected**: deposit failed the checks - - **verified**: content and metadata verified + - **verified**: content and metadata verified, ready for loading - **loading**: loading in-progress - **done**: loading completed successfully - **failed**: the deposit loading has failed Also known as STATE-IRI :param text : the client's credentials :statuscode 201: with the deposit's status :statuscode 401: Unauthorized :statuscode 404: access to an unknown deposit +Rejected deposit +~~~~~~~~~~~~~~~~ + +It so happens that deposit could be rejected. In that case, the +`deposit_status_detail` entry will explain failed checks. + +Many reasons are possibles, here are some: + +- Deposit without software archive (main goal of the deposit is to + deposit software source code) + +- Deposit with malformed software archive (i.e archive within archive) + +- Deposit with invalid software archive (corrupted archive, although, + this one should happen during upload and not during checks) + +- Deposit with unsupported archive format + +- Deposit with missing metadata + Sample response ~~~~~~~~~~~~~~~ + + Successful deposit: + + .. code:: xml + + + 150 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:rev:c648730299c2a4f4df3c1fe6e527ef3681f9527e + + + Rejected deposit: + + .. code:: xml + + + 148 + rejected + - At least one url field must be compatible with the client's domain name (codemeta:url) + diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 259442ea..2d53f760 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.55 +Version: 0.0.56 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py index e4907607..52930a6d 100644 --- a/swh/deposit/api/deposit_status.py +++ b/swh/deposit/api/deposit_status.py @@ -1,107 +1,111 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from rest_framework import status from .common import SWHBaseDeposit from ..errors import NOT_FOUND, make_error_response from ..errors import make_error_response_from_dict from ..models import DEPOSIT_STATUS_DETAIL, Deposit def convert_status_detail(status_detail): """Given a status_detail dict, transforms it into a human readable string. Dict has the following form (all first level keys are optional): { 'url': { 'summary': , 'fields': }, 'metadata': [{ 'summary': , 'fields': , }], - 'archive': { + 'archive': [{ 'summary': , - 'fields': [], - } + 'fields': , + }] } Args: status_detail (dict): Returns: Status detail as inlined string. """ if not status_detail: return None msg = [] - if 'metadata' in status_detail: - for data in status_detail['metadata']: - fields = ', '.join(data['fields']) - msg.append('- %s (%s)\n' % (data['summary'], fields)) - - for key in ['url', 'archive']: - if key in status_detail: - _detail = status_detail[key] - fields = _detail.get('fields') - suffix_msg = '' - if fields: - suffix_msg = ' (%s)' % ', '.join(fields) - msg.append('- %s%s\n' % (_detail['summary'], suffix_msg)) + for key in ['metadata', 'archive']: + _detail = status_detail.get(key) + if _detail: + for data in _detail: + suffix_msg = '' + fields = data.get('fields') + if fields: + suffix_msg = ' (%s)' % ', '.join(fields) + msg.append('- %s%s\n' % (data['summary'], suffix_msg)) + + _detail = status_detail.get('url') + if _detail: + fields = _detail.get('fields') + suffix_msg = '' + if fields: + suffix_msg = ' (%s)' % ', '.join(fields) + msg.append('- %s%s\n' % (_detail['summary'], suffix_msg)) if not msg: return None return ''.join(msg) class SWHDepositStatus(SWHBaseDeposit): """Deposit status. What's known as 'State IRI' in the sword specification. HTTP verbs supported: GET """ def get(self, req, collection_name, deposit_id, format=None): checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( req, NOT_FOUND, 'deposit %s does not belong to collection %s' % ( deposit_id, collection_name)) status_detail = convert_status_detail(deposit.status_detail) if not status_detail: status_detail = DEPOSIT_STATUS_DETAIL[deposit.status] context = { 'deposit_id': deposit.id, 'status': deposit.status, 'status_detail': status_detail, 'swh_id': None, } if deposit.swh_id: context['swh_id'] = deposit.swh_id return render(req, 'deposit/status.xml', context=context, content_type='application/xml', status=status.HTTP_200_OK) diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index e69de29b..f4acbba4 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -0,0 +1,51 @@ +# Copyright (C) 2017-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from ...config import METADATA_TYPE +from ...models import DepositRequest, Deposit + + +class DepositReadMixin: + """Deposit Read mixin + + """ + + def _deposit_requests(self, deposit, request_type): + """Given a deposit, yields its associated deposit_request + + Args: + deposit (Deposit): Deposit to list requests for + request_type (str): Archive or metadata type + + Yields: + deposit requests of type request_type associated to the deposit + + """ + if isinstance(deposit, int): + deposit = Deposit.objects.get(pk=deposit) + + deposit_requests = DepositRequest.objects.filter( + type=self.deposit_request_types[request_type], + deposit=deposit).order_by('id') + + for deposit_request in deposit_requests: + yield deposit_request + + def _metadata_get(self, deposit): + """Given a deposit, aggregate all metadata requests. + + Args: + deposit (Deposit): The deposit instance to extract + metadata from. + + Returns: + metadata dict from the deposit. + + """ + metadata = {} + for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): + metadata.update(dr.metadata) + return metadata diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 686b6558..cefcd926 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,249 +1,246 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json -import patoolib +import re +import tarfile +import zipfile from rest_framework import status +from . import DepositReadMixin from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED -from ...config import ARCHIVE_TYPE, METADATA_TYPE -from ...models import Deposit, DepositRequest - +from ...config import ARCHIVE_TYPE +from ...models import Deposit MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' - -MANDATORY_ARCHIVE_UNREADABLE = 'Deposit was rejected because at least one of its associated archives was not readable' # noqa -MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa +MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa +MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa +MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' +MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' +ARCHIVE_EXTENSIONS = [ + 'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', + 'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' +] -class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): +PATTERN_ARCHIVE_EXTENSION = re.compile( + r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) + + +class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ - def _deposit_requests(self, deposit, request_type): - """Given a deposit, yields its associated deposit_request - - Args: - deposit (Deposit): Deposit to list requests for - request_type (str): Archive or metadata type - - Yields: - deposit requests of type request_type associated to the deposit - - """ - deposit_requests = DepositRequest.objects.filter( - type=self.deposit_request_types[request_type], - deposit=deposit).order_by('id') - - for deposit_request in deposit_requests: - yield deposit_request - def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, { - 'archive': { + 'archive': [{ 'summary': MANDATORY_ARCHIVE_MISSING, - } + }] } - rejected_dr_ids = [] - for dr in requests: - _path = dr.archive.path - check = self._check_archive(_path) + errors = [] + for archive_request in requests: + check, error_message = self._check_archive(archive_request) if not check: - rejected_dr_ids.append(dr.id) + errors.append({ + 'summary': error_message, + 'fields': [archive_request.id] + }) - if rejected_dr_ids: - return False, { - 'archive': { - 'summary': MANDATORY_ARCHIVE_UNREADABLE, - 'fields': rejected_dr_ids, - }} - return True, None + if not errors: + return True, None + return False, { + 'archive': errors + } + + def _check_archive(self, archive_request): + """Check that a deposit associated archive is ok: + - readable + - supported archive format + - valid content: the archive does not contain a single archive file - def _check_archive(self, archive_path): - """Check that a given archive is actually ok for reading. + If any of those checks are not ok, return the corresponding + failing check. Args: - archive_path (str): Archive to check + archive_path (DepositRequest): Archive to check Returns: - True if archive is successfully read, False otherwise. + (True, None) if archive is check compliant, (False, + ) otherwise. """ + archive_path = archive_request.archive.path try: - patoolib.test_archive(archive_path, verbosity=-1) + if zipfile.is_zipfile(archive_path): + with zipfile.ZipFile(archive_path) as f: + files = f.namelist() + elif tarfile.is_tarfile(archive_path): + with tarfile.open(archive_path) as f: + files = f.getnames() + else: + return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: - return False - else: - return True - - def _metadata_get(self, deposit): - """Given a deposit, aggregate all metadata requests. - - Args: - deposit (Deposit): The deposit instance to extract - metadata from. - - Returns: - metadata dict from the deposit. - - """ - metadata = {} - for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): - metadata.update(dr.metadata) - return metadata + return False, MANDATORY_ARCHIVE_UNREADABLE + if len(files) > 1: + return True, None + element = files[0] + if PATTERN_ARCHIVE_EXTENSION.match(element): + # archive in archive! + return False, MANDATORY_ARCHIVE_INVALID + return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { 'url': False, 'external_identifier': False, 'author': False, } alternate_fields = { ('name', 'title'): False, # alternate field, at least one - # of them must be present + # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] optional_result = [ ' or '.join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: detail.append({ 'summary': MANDATORY_FIELDS_MISSING, 'fields': mandatory_result }) if optional_result != []: detail.append({ 'summary': ALTERNATE_FIELDS_MISSING, 'fields': optional_result, }) return False, { 'metadata': detail } def _check_url(self, client_domain, metadata): """Check compatibility between client_domain and url field in metadata Args: client_domain (str): url associated with the deposit's client metadata (dict): Metadata where to find url Returns: tuple (status, error_detail): True, None if url associated with the deposit's client is ok, (False, ) otherwise. """ url_fields = [] for field in metadata: if 'url' in field: if client_domain in metadata[field]: return True, None url_fields.append(field) detail = { 'url': { 'summary': INCOMPATIBLE_URL_FIELDS, } } if url_fields: detail['url']['fields'] = url_fields return False, detail def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) client_domain = deposit.client.domain metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) url_status, error_detail = self._check_url(client_domain, metadata) if not url_status: problems.update(error_detail) deposit_status = archives_status and metadata_status and url_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { 'status': deposit.status, 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index f145a9de..f34903af 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,237 +1,221 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers -from ...config import SWH_PERSON +from . import DepositReadMixin +from ...config import SWH_PERSON, ARCHIVE_TYPE from ..common import SWHGetDepositAPI, SWHPrivateAPIView -from ...models import Deposit, DepositRequest +from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive # from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) else: # only 1 archive, no need to do fancy actions (and no cleanup step) yield archive_paths[0] -class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView): +class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView, + DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def retrieve_archives(self, deposit_id): """Given a deposit identifier, returns its associated archives' path. Yields: path to deposited archives """ - deposit = Deposit.objects.get(pk=deposit_id) - deposit_requests = DepositRequest.objects.filter( - deposit=deposit, - type=self.deposit_request_types['archive']).order_by('id') - + deposit_requests = self._deposit_requests( + deposit_id, request_type=ARCHIVE_TYPE) for deposit_request in deposit_requests: yield deposit_request.archive.path def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = list(self.retrieve_archives(deposit_id)) with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') -class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView): +class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView, + DepositReadMixin): """Class in charge of aggregating metadata on a deposit. - """ + """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client 'provider_type': 'deposit_client', 'metadata': {} }), 'tool': ('dict', { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } }) } def __init__(self): super().__init__() self.provider = self.config['provider'] self.tool = self.config['tool'] - def _aggregate_metadata(self, deposit, metadata_requests): - """Retrieve and aggregates metadata information. - - """ - metadata = {} - for req in metadata_requests: - metadata.update(req.metadata) - - return metadata - def _retrieve_url(self, deposit, metadata): client_domain = deposit.client.domain for field in metadata: if 'url' in field: if client_domain in metadata[field]: return metadata[field] - def aggregate(self, deposit, requests): - """Aggregate multiple data on deposit into one unified data dictionary. + def metadata_read(self, deposit): + """Read and aggregate multiple data on deposit into one unified data + dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. - requests ([DepositRequest]): List of associated requests which - need aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ data = {} - - # Retrieve tarballs/metadata information - metadata = self._aggregate_metadata(deposit, requests) + metadata = self._metadata_get(deposit) # create origin_url from metadata only after deposit_check validates it origin_url = self._retrieve_url(deposit, metadata) # Read information metadata data['origin'] = { 'type': 'deposit', 'url': origin_url } # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider['provider_name'] = deposit.client.last_name self.provider['provider_url'] = deposit.client.provider_url revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) complete_date = identifiers.normalize_timestamp(deposit.complete_date) data['revision'] = { 'synthetic': True, 'date': complete_date, 'committer_date': complete_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, 'message': revision_msg, 'metadata': metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id) parent_revision = persistent_identifier.object_id data['revision']['parents'] = [parent_revision] data['branch_name'] = 'master' data['origin_metadata'] = { 'provider': self.provider, 'tool': self.tool, 'metadata': metadata } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) - requests = DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata']) - - data = self.aggregate(deposit, requests) + data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, 'application/json' diff --git a/swh/deposit/models.py b/swh/deposit/models.py index 5d9bbadb..df8cfc9f 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,215 +1,217 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now -from .config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED -from .config import DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS -from .config import DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_REJECTED +from .config import ( + DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, + DEPOSIT_STATUS_REJECTED +) class Dbversion(models.Model): """Db version """ version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: db_table = 'dbversion' def __str__(self): return str({ 'version': self.version, 'release': self.release, 'description': self.description }) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), ('expired', 'expired'), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), - ('rejected', 'rejected'), + (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED), ('loading', 'loading'), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' 'In-Progress header should be false', 'expired': 'Deposit has been there too long and is now ' 'deemed ready to be garbage collected', DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' '(tarball ok, metadata, etc...)', DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' 'ready for loading', - 'rejected': 'Deposit failed the checks', + DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks', 'loading': "Loading is ongoing on swh's side", DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' 'loaded into the Software Heritage archive', DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' 'Software Heritage archive failed', } class DepositClient(User): """Deposit client """ collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: db_table = 'deposit_client' def __str__(self): return str({ 'id': self.id, 'collections': self.collections, 'username': super().username, 'domain': self.domain, 'provider_url': self.provider_url, }) class Deposit(models.Model): """Deposit reception table """ id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit collection = models.ForeignKey( 'DepositCollection', models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client client = models.ForeignKey('DepositClient', models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) # Deposit's status regarding loading status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', null=True) class Meta: db_table = 'deposit' def __str__(self): d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, 'status': self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): d['status_detail'] = self.status_detail return str(d) class DepositRequestType(models.Model): """Deposit request type made by clients (either archive or metadata) """ id = models.BigAutoField(primary_key=True) name = models.TextField() class Meta: db_table = 'deposit_request_type' def __str__(self): return str({'id': self.id, 'name': self.name}) def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) class DepositRequest(models.Model): """Deposit request associated to one deposit. """ id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) type = models.ForeignKey( 'DepositRequestType', models.DO_NOTHING) class Meta: db_table = 'deposit_request' def __str__(self): meta = None if self.metadata: from json import dumps meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name return str({ 'id': self.id, 'deposit': self.deposit, 'metadata': meta, 'archive': archive_name }) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: db_table = 'deposit_collection' def __str__(self): return str({'id': self.id, 'name': self.name}) diff --git a/swh/deposit/tests/api/test_deposit_binary.py b/swh/deposit/tests/api/test_deposit_binary.py index e28ba807..469f8e6e 100644 --- a/swh/deposit/tests/api/test_deposit_binary.py +++ b/swh/deposit/tests/api/test_deposit_binary.py @@ -1,652 +1,652 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.files.uploadedfile import InMemoryUploadedFile from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.tests import TEST_CONFIG from swh.deposit.config import COL_IRI, EM_IRI from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml -from ..common import BasicTestCase, WithAuthTestCase, create_arborescence_zip +from ..common import BasicTestCase, WithAuthTestCase, create_arborescence_archive from ..common import FileSystemCreationRoutine class DepositTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine): """Try and upload one single deposit """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data1 = b""" hal urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data2 = b""" %s """ self.atom_entry_data_empty_body = b""" """ self.atom_entry_data3 = b""" something """ self.data_atom_entry_ok = b""" Title urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2005-10-07T17:17:08Z Contributor The abstract The abstract Access Rights Alternative Title Date Available Bibliographic Citation # noqa Contributor Description Has Part Has Version Identifier Is Part Of Publisher References Rights Holder Source Title Type """ @istest def post_deposit_binary_without_slug_header_is_bad_request(self): """Posting a binary deposit without slug header should return 400 """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @istest def post_deposit_binary_upload_final_and_status_check(self): """Binary upload with correct headers should return 201 with receipt """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then response_content = parse_xml(BytesIO(response.content)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, self.archive['name']) response_content = parse_xml(BytesIO(response.content)) self.assertEqual(response_content['deposit_archive'], self.archive['name']) self.assertEqual(int(response_content['deposit_id']), deposit.id) self.assertEqual(response_content['deposit_status'], deposit.status) edit_se_iri = reverse('edit_se_iri', args=[self.collection.name, deposit.id]) self.assertEqual(response._headers['location'], ('Location', 'http://testserver' + edit_se_iri)) @istest def post_deposit_binary_upload_supports_zip_or_tar(self): """Binary upload with content-type not in [zip,x-tar] should return 415 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/octet-stream', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_binary_fails_if_unsupported_packaging_header( self): """Bin deposit without supported content_disposition header returns 400 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='something-unsupported', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_binary_upload_fail_if_no_content_disposition_header( self): """Binary upload without content_disposition header should return 400 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_mediation_not_supported(self): """Binary upload with mediation should return a 412 response """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_ON_BEHALF_OF='someone', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_412_PRECONDITION_FAILED) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( self): """Binary upload must not exceed the limit set up... """ # given url = reverse(COL_IRI, args=[self.collection.name]) - archive = create_arborescence_zip( + archive = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some content in file', up_to_size=TEST_CONFIG['max_upload_size']) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=archive['data'], # + headers CONTENT_LENGTH=archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_413_REQUEST_ENTITY_TOO_LARGE) self.assertRegex(response.content, b'Upload size limit exceeded') with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_2_post_2_different_deposits(self): """2 posting deposits should return 2 different 201 with receipt """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG='some-external-id-1', HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() self.assertEqual(len(deposits), 1) self.assertEqual(deposits[0], deposit) # second post response = self.client.post( url, content_type='application/x-tar', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG='another-external-id', HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename1') self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id2 = response_content['deposit_id'] deposit2 = Deposit.objects.get(pk=deposit_id2) self.assertNotEqual(deposit, deposit2) deposits = Deposit.objects.all().order_by('id') self.assertEqual(len(deposits), 2) self.assertEqual(list(deposits), [deposit, deposit2]) @istest def post_deposit_binary_and_post_to_add_another_archive(self): """Updating a deposit should return a 201 with receipt """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='true', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertEquals(deposit_request.type.name, 'archive') self.assertRegex(deposit_request.archive.name, self.archive['name']) # 2nd archive to upload - archive2 = create_arborescence_zip( + archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') # uri to update the content update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) # adding another archive for the deposit and finalizing it response = self.client.post( update_uri, content_type='application/zip', # as zip data=archive2['data'], # + headers CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( archive2['name'])) self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = list(DepositRequest.objects.filter(deposit=deposit). order_by('id')) # 2 deposit requests for the same deposit self.assertEquals(len(deposit_requests), 2) self.assertEquals(deposit_requests[0].deposit, deposit) self.assertEquals(deposit_requests[0].type.name, 'archive') self.assertRegex(deposit_requests[0].archive.name, self.archive['name']) self.assertEquals(deposit_requests[1].deposit, deposit) self.assertEquals(deposit_requests[1].type.name, 'archive') self.assertRegex(deposit_requests[1].archive.name, archive2['name']) # only 1 deposit in db deposits = Deposit.objects.all() self.assertEqual(len(deposits), 1) @istest def post_deposit_then_post_or_put_is_refused_when_status_ready(self): """Updating a deposit with status 'ready' should return a 400 """ url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, 'filename0') # updating/adding is forbidden # uri to update the content edit_se_iri = reverse( 'edit_se_iri', args=[self.collection.name, deposit_id]) em_iri = reverse( 'em_iri', args=[self.collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready - archive2 = create_arborescence_zip( + archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some content in file 2') # replacing file is no longer possible since the deposit's # status is ready r = self.client.put( em_iri, content_type='application/zip', data=archive2['data'], CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # adding file is no longer possible since the deposit's status # is ready r = self.client.post( em_iri, content_type='application/zip', data=archive2['data'], CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # replacing metadata is no longer possible since the deposit's # status is ready r = self.client.put( edit_se_iri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_ok, CONTENT_LENGTH=len(self.data_atom_entry_ok), HTTP_SLUG=external_id) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # adding new metadata is no longer possible since the # deposit's status is ready r = self.client.post( edit_se_iri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_ok, CONTENT_LENGTH=len(self.data_atom_entry_ok), HTTP_SLUG=external_id) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) archive_content = b'some content representing archive' archive = InMemoryUploadedFile( BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/zip', size=len(archive_content), charset=None) atom_entry = InMemoryUploadedFile( BytesIO(self.data_atom_entry_ok), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(self.data_atom_entry_ok), charset='utf-8') # replacing multipart metadata is no longer possible since the # deposit's status is ready r = self.client.put( edit_se_iri, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # adding new metadata is no longer possible since the # deposit's status is ready r = self.client.post( edit_se_iri, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py index 46768ce2..e2d15831 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,192 +1,247 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import unittest from django.core.urlresolvers import reverse from nose.tools import istest from nose.plugins.attrib import attr from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import ( DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED ) from swh.deposit.api.private.deposit_check import ( - SWHChecksDeposit, + SWHChecksDeposit, MANDATORY_ARCHIVE_INVALID, MANDATORY_FIELDS_MISSING, INCOMPATIBLE_URL_FIELDS, - MANDATORY_ARCHIVE_UNREADABLE, ALTERNATE_FIELDS_MISSING + MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING, + MANDATORY_ARCHIVE_MISSING ) from swh.deposit.models import Deposit from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine @attr('fs') class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() @istest def deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) @istest - def deposit_ko(self): - """Invalid deposit should fail the checks (-> status rejected) + def deposit_invalid_tarball(self): + """Deposit with tarball (of 1 tarball) should fail the checks: rejected + + """ + for archive_extension in ['zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz']: + deposit_id = self.create_deposit_archive_with_archive( + archive_extension) + + deposit = Deposit.objects.get(pk=deposit_id) + self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status) + + url = reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit.id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) + details = data['details'] + # archive checks failure + self.assertEqual(len(details['archive']), 1) + self.assertEqual(details['archive'][0]['summary'], + MANDATORY_ARCHIVE_INVALID) + + deposit = Deposit.objects.get(pk=deposit.id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) + + @istest + def deposit_ko_missing_tarball(self): + """Deposit without archive should fail the checks: rejected + + """ + deposit_id = self.create_deposit_ready() # no archive, only atom + deposit = Deposit.objects.get(pk=deposit_id) + self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status) + + url = reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit.id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) + details = data['details'] + # archive checks failure + self.assertEqual(len(details['archive']), 1) + self.assertEqual(details['archive'][0]['summary'], + MANDATORY_ARCHIVE_MISSING) + deposit = Deposit.objects.get(pk=deposit.id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) + + @istest + def deposit_ko_unsupported_tarball(self): + """Deposit with an unsupported tarball should fail the checks: rejected """ deposit_id = self.create_deposit_with_invalid_archive() deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure - self.assertEqual(len(details['archive']['fields']), 1) - self.assertEqual(details['archive']['summary'], - MANDATORY_ARCHIVE_UNREADABLE) + self.assertEqual(len(details['archive']), 1) + self.assertEqual(details['archive'][0]['summary'], + MANDATORY_ARCHIVE_UNSUPPORTED) # metadata check failure self.assertEqual(len(details['metadata']), 2) mandatory = details['metadata'][0] self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING) self.assertEqual(set(mandatory['fields']), set(['url', 'external_identifier', 'author'])) alternate = details['metadata'][1] self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING) self.assertEqual(alternate['fields'], ['name or title']) # url check failure self.assertEqual(details['url']['summary'], INCOMPATIBLE_URL_FIELDS) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) @istest def check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEquals(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) class CheckMetadata(unittest.TestCase, SWHChecksDeposit): @istest def check_metadata_ok(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) @istest def check_metadata_ok2(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) @istest def check_metadata_ko(self): """Missing optional field should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory alternate fields are missing', 'fields': ['name or title'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) @istest def check_metadata_ko2(self): """Missing mandatory fields should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory fields are missing', 'fields': ['author'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) diff --git a/swh/deposit/tests/api/test_deposit_read_archive.py b/swh/deposit/tests/api/test_deposit_read_archive.py index b6284308..21c4bf90 100644 --- a/swh/deposit/tests/api/test_deposit_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_read_archive.py @@ -1,130 +1,130 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import os from django.core.urlresolvers import reverse from nose.tools import istest from nose.plugins.attrib import attr from rest_framework import status from rest_framework.test import APITestCase from swh.core import tarball from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.tests import TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine -from ..common import FileSystemCreationRoutine, create_arborescence_zip +from ..common import FileSystemCreationRoutine, create_arborescence_archive @attr('fs') class DepositReadArchivesTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() - self.archive2 = create_arborescence_zip( + self.archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') self.workdir = os.path.join(self.root_path, 'workdir') @istest def access_to_existing_deposit_with_one_archive(self): """Access to deposit should stream a 200 response with its raw content """ deposit_id = self.create_simple_binary_deposit() url = reverse(PRIVATE_GET_RAW_CONTENT, args=[self.collection.name, deposit_id]) r = self.client.get(url) self.assertEquals(r.status_code, status.HTTP_200_OK) self.assertEquals(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self.assertEquals(actual_sha1, self.archive['sha1sum']) # this does not touch the extraction dir so this should stay empty self.assertEquals(os.listdir(TEST_CONFIG['extraction_dir']), []) def _check_tarball_consistency(self, actual_sha1): tarball.uncompress(self.archive['path'], self.workdir) self.assertEquals(os.listdir(self.workdir), ['file1']) tarball.uncompress(self.archive2['path'], self.workdir) lst = set(os.listdir(self.workdir)) self.assertEquals(lst, {'file1', 'file2'}) new_path = self.workdir + '.zip' tarball.compress(new_path, 'zip', self.workdir) with open(new_path, 'rb') as f: h = hashlib.sha1(f.read()).hexdigest() self.assertEqual(actual_sha1, h) self.assertNotEqual(actual_sha1, self.archive['sha1sum']) self.assertNotEqual(actual_sha1, self.archive2['sha1sum']) @istest def access_to_existing_deposit_with_multiple_archives(self): """Access to deposit should stream a 200 response with its raw contents """ deposit_id = self.create_complex_binary_deposit() url = reverse(PRIVATE_GET_RAW_CONTENT, args=[self.collection.name, deposit_id]) r = self.client.get(url) self.assertEquals(r.status_code, status.HTTP_200_OK) self.assertEquals(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self._check_tarball_consistency(actual_sha1) # this touches the extraction directory but should clean up # after itself self.assertEquals(os.listdir(TEST_CONFIG['extraction_dir']), []) class DepositReadArchivesFailureTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): @istest def access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_RAW_CONTENT, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) @istest def access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_RAW_CONTENT, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8')) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py index 2260e789..2fe4ec6d 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,207 +1,211 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ @istest def read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response._headers['content-type'][1], 'application/json') data = json.loads(response.content.decode('utf-8')) expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier':'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'tool_name': 'swh-deposit', 'tool_version': '0.0.1', 'tool_configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'committer_date': None, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'date': None, 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' }, 'branch_name': 'master', } self.assertEquals(data, expected_meta) @istest def read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEquals(deposit_parent.swh_id, swh_persistent_id) self.assertEquals(deposit_parent.external_id, 'some-external-id') self.assertEquals(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.external_id, 'some-external-id') self.assertEquals(deposit.swh_id, None) self.assertEquals(deposit.parent, deposit_parent) self.assertEquals(deposit.status, DEPOSIT_STATUS_PARTIAL) url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response._headers['content-type'][1], 'application/json') data = json.loads(response.content.decode('utf-8')) expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'tool_name': 'swh-deposit', 'tool_version': '0.0.1', 'tool_configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'date': None, 'committer_date': None, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'parents': [swh_id] }, 'branch_name': 'master', } self.assertEquals(data, expected_meta) @istest def access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) @istest def access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8'),) diff --git a/swh/deposit/tests/api/test_deposit_status.py b/swh/deposit/tests/api/test_deposit_status.py index 180eacb8..ced39896 100644 --- a/swh/deposit/tests/api/test_deposit_status.py +++ b/swh/deposit/tests/api/test_deposit_status.py @@ -1,218 +1,230 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.api.deposit_status import convert_status_detail from swh.deposit.config import (COL_IRI, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED) from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.models import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, FileSystemCreationRoutine from ..common import CommonCreationRoutine class DepositStatusTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Status on deposit """ @istest def post_deposit_with_status_check(self): """Binary upload should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) deposit = Deposit.objects.get(external_id=external_id) status_url = reverse(STATE_IRI, args=[self.collection.name, deposit.id]) # check status status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit.id) self.assertEqual(r['deposit_status'], DEPOSIT_STATUS_DEPOSITED) self.assertEqual(r['deposit_status_detail'], DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED]) @istest def status_with_swh_id(self): _status = DEPOSIT_STATUS_LOAD_SUCCESS _swh_id = '548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit_id) self.assertEqual(r['deposit_status'], _status) self.assertEqual(r['deposit_status_detail'], DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS]) self.assertEqual(r['deposit_swh_id'], _swh_id) @istest def status_on_unknown_deposit(self): """Asking for the status of unknown deposit returns 404 response""" status_url = reverse(STATE_IRI, args=[self.collection.name, 999]) status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_404_NOT_FOUND) @istest def status_with_http_accept_header_should_not_break(self): """Asking deposit status with Accept header should return 200 """ deposit_id = self.create_deposit_partial() status_url = reverse(STATE_IRI, args=[ self.collection.name, deposit_id]) response = self.client.get( status_url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertEqual(response.status_code, status.HTTP_200_OK) @istest def convert_status_detail_empty(self): actual_status_detail = convert_status_detail({}) self.assertIsNone(actual_status_detail) actual_status_detail = convert_status_detail({'dummy-keys': []}) self.assertIsNone(actual_status_detail) actual_status_detail = convert_status_detail(None) self.assertIsNone(actual_status_detail) @istest def convert_status_detail(self): status_detail = { 'url': { 'summary': "At least one url field must be compatible with the client\'s domain name. The following url fields failed the check", # noqa 'fields': ['blahurl', 'testurl'], }, 'metadata': [ { 'summary': 'Mandatory fields missing', 'fields': ['url', 'title'], }, { 'summary': 'Alternate fields missing', 'fields': ['name or title', 'url or badurl'] } ], - 'archive': { + 'archive': [{ 'summary': 'Unreadable archive', - 'fields': ['1', '2'], - }, + 'fields': ['1'], + }], } expected_status_detail = '''- Mandatory fields missing (url, title) - Alternate fields missing (name or title, url or badurl) +- Unreadable archive (1) - At least one url field must be compatible with the client's domain name. The following url fields failed the check (blahurl, testurl) -- Unreadable archive (1, 2) ''' # noqa actual_status_detail = convert_status_detail(status_detail) self.assertEqual(actual_status_detail, expected_status_detail) @istest def convert_status_detail_2(self): status_detail = { 'url': { 'summary': 'At least one compatible url field. Failed', 'fields': ['testurl'], }, 'metadata': [ { 'summary': 'Mandatory fields missing', 'fields': ['name'], }, ], + 'archive': [ + { + 'summary': 'Invalid archive', + 'fields': ['2'], + }, + { + 'summary': 'Unsupported archive', + 'fields': ['1'], + } + ], } expected_status_detail = '''- Mandatory fields missing (name) +- Invalid archive (2) +- Unsupported archive (1) - At least one compatible url field. Failed (testurl) ''' actual_status_detail = convert_status_detail(status_detail) self.assertEqual(actual_status_detail, expected_status_detail) @istest def convert_status_detail_3(self): status_detail = { 'url': { 'summary': 'At least one compatible url field', }, } expected_status_detail = '- At least one compatible url field\n' actual_status_detail = convert_status_detail(status_detail) self.assertEqual(actual_status_detail, expected_status_detail) @istest def status_on_deposit_rejected(self): _status = DEPOSIT_STATUS_REJECTED _swh_id = '548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' _status_detail = {'url': {'summary': 'Wrong url'}} # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id, status_detail=_status_detail) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit_id) self.assertEqual(r['deposit_status'], _status) self.assertEqual(r['deposit_status_detail'], '- Wrong url') self.assertEqual(r['deposit_swh_id'], _swh_id) diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py index 3f227ecc..22478011 100644 --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update.py @@ -1,345 +1,345 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DepositRequest from swh.deposit.config import EDIT_SE_IRI, EM_IRI from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine -from ..common import FileSystemCreationRoutine, create_arborescence_zip +from ..common import FileSystemCreationRoutine, create_arborescence_archive class DepositUpdateOrReplaceExistingDataTest( APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Try put/post (update/replace) query on EM_IRI """ def setUp(self): super().setUp() self.atom_entry_data1 = b""" bar """ self.atom_entry_data1 = b""" bar """ - self.archive2 = create_arborescence_zip( + self.archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') @istest def replace_archive_to_deposit_is_possible(self): """Replace all archive with another one should return a 204 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']) assert len(list(requests)) == 1 assert self.archive['name'] in requests[0].archive.name # we have no metadata for that deposit requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) assert len(requests) == 0 deposit_id = self._update_deposit_with_status(deposit_id, status_partial=True) requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) assert len(requests) == 1 update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) external_id = 'some-external-id-1' response = self.client.put( update_uri, content_type='application/zip', # as zip data=self.archive2['data'], # + headers CONTENT_LENGTH=self.archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive2['name'], )) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']) self.assertEquals(len(list(requests)), 1) self.assertRegex(requests[0].archive.name, self.archive2['name']) # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) self.assertEquals(len(requests), 1) @istest def replace_metadata_to_deposit_is_possible(self): """Replace all metadata with another one should return a 204 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) assert len(list(requests)) == 0 requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) assert len(requests) == 1 update_uri = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.put( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data1) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) self.assertEquals(len(list(requests)), 1) metadata = requests[0].metadata self.assertEquals(metadata['foobar'], 'bar') # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) self.assertEquals(len(requests), 1) @istest def add_archive_to_deposit_is_possible(self): """Add another archive to a deposit return a 201 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']) assert len(list(requests)) == 1 assert self.archive['name'] in requests[0].archive.name requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) assert len(requests) == 0 update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) external_id = 'some-external-id-1' response = self.client.post( update_uri, content_type='application/zip', # as zip data=self.archive2['data'], # + headers CONTENT_LENGTH=self.archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive2['name'],)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']).order_by('id')) self.assertEquals(len(requests), 2) # first archive still exists self.assertRegex(requests[0].archive.name, self.archive['name']) # a new one was added self.assertRegex(requests[1].archive.name, self.archive2['name']) # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) self.assertEquals(len(requests), 0) @istest def add_metadata_to_deposit_is_possible(self): """Add metadata with another one should return a 204 response """ # given deposit_id = self.create_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) assert len(list(requests)) == 2 requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) assert len(requests) == 0 update_uri = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.post( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data1) self.assertEqual(response.status_code, status.HTTP_201_CREATED) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']).order_by('id') self.assertEquals(len(list(requests)), 3) # a new one was added self.assertEquals(requests[1].metadata['foobar'], 'bar') # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) self.assertEquals(len(requests), 0) class DepositUpdateFailuresTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Failure scenario about add/replace (post/put) query on deposit. """ @istest def add_metadata_to_unknown_collection(self): """Replacing metadata to unknown deposit should return a 404 response """ url = reverse(EDIT_SE_IRI, args=['test', 1000]) response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Unknown collection name test') @istest def add_metadata_to_unknown_deposit(self): """Replacing metadata to unknown deposit should return a 404 response """ url = reverse(EDIT_SE_IRI, args=[self.collection.name, 999]) response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 999 does not exist') @istest def replace_metadata_to_unknown_deposit(self): """Adding metadata to unknown deposit should return a 404 response """ url = reverse(EDIT_SE_IRI, args=[self.collection.name, 998]) response = self.client.put( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 998 does not exist') @istest def add_archive_to_unknown_deposit(self): """Adding metadata to unknown deposit should return a 404 response """ url = reverse(EM_IRI, args=[self.collection.name, 997]) response = self.client.post( url, content_type='application/zip', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 997 does not exist') @istest def replace_archive_to_unknown_deposit(self): """Replacing archive to unknown deposit should return a 404 response """ url = reverse(EM_IRI, args=[self.collection.name, 996]) response = self.client.put( url, content_type='application/zip', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 996 does not exist') @istest def post_metadata_to_em_iri_failure(self): """Update (POST) archive with wrong content type should return 400 """ deposit_id = self.create_deposit_partial() # only update on partial update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.post( update_uri, content_type='application/x-gtar-compressed', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertRegex(response.content.decode('utf-8'), 'Packaging format supported is restricted to ' 'application/zip, application/x-tar') @istest def put_metadata_to_em_iri_failure(self): """Update (PUT) archive with wrong content type should return 400 """ # given deposit_id = self.create_deposit_partial() # only update on partial # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.put( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertRegex(response.content.decode('utf-8'), 'Packaging format supported is restricted to ' 'application/zip, application/x-tar') diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index f4d66e6a..76a4f067 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,479 +1,562 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil +import tarfile import tempfile from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from nose.plugins.attrib import attr from rest_framework import status from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_DEPOSITED) from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball -def create_arborescence_zip(root_path, archive_name, filename, content, - up_to_size=None): +def compute_info(archive_path): + """Given a path, compute information on path. + + """ + with open(archive_path, 'rb') as f: + length = 0 + sha1sum = hashlib.sha1() + md5sum = hashlib.md5() + data = b'' + for chunk in f: + sha1sum.update(chunk) + md5sum.update(chunk) + length += len(chunk) + data += chunk + + return { + 'dir': os.path.dirname(archive_path), + 'name': os.path.basename(archive_path), + 'path': archive_path, + 'length': length, + 'sha1sum': sha1sum.hexdigest(), + 'md5sum': md5sum.hexdigest(), + 'data': data + } + + +def _compress(path, extension, dir_path): + """Compress path according to extension + + """ + if extension == 'zip' or extension == 'tar': + return tarball.compress(path, extension, dir_path) + elif '.' in extension: + split_ext = extension.split('.') + if split_ext[0] != 'tar': + raise ValueError( + 'Development error, only zip or tar archive supported, ' + '%s not supported' % extension) + + # deal with specific tar + mode = split_ext[1] + supported_mode = ['xz', 'gz', 'bz2'] + if mode not in supported_mode: + raise ValueError( + 'Development error, only %s supported, %s not supported' % ( + supported_mode, mode)) + files = tarball._ls(dir_path) + with tarfile.open(path, 'w:%s' % mode) as t: + for fpath, fname in files: + t.add(fpath, arcname=fname, recursive=False) + + return path + + +def create_arborescence_archive(root_path, archive_name, filename, content, + up_to_size=None, extension='zip'): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. + Args: + root_path (str): Location path of the archive to create + archive_name (str): Archive's name (without extension) + filename (str): Archive's content is only one filename + content (bytes): Content of the filename + up_to_size (int | None): Fill in the blanks size to oversize + or complete an archive's size + extension (str): Extension of the archive to write (default is zip) + Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size - zip_path = dir_path + '.zip' - zip_path = tarball.compress(zip_path, 'zip', dir_path) + _path = '%s.%s' % (dir_path, extension) + _path = _compress(_path, extension, dir_path) + return compute_info(_path) - with open(zip_path, 'rb') as f: - length = 0 - sha1sum = hashlib.sha1() - md5sum = hashlib.md5() - data = b'' - for chunk in f: - sha1sum.update(chunk) - md5sum.update(chunk) - length += len(chunk) - data += chunk - return { - 'dir': archive_path_dir, - 'name': archive_name, - 'data': data, - 'path': zip_path, - 'sha1sum': sha1sum.hexdigest(), - 'md5sum': md5sum.hexdigest(), - 'length': length, - } +def create_archive_with_archive(root_path, name, archive): + """Create an archive holding another. + + """ + invalid_archive_path = os.path.join(root_path, name) + with tarfile.open(invalid_archive_path, 'w:gz') as _archive: + _archive.add(archive['path'], arcname=archive['name']) + return compute_info(invalid_archive_path) @attr('fs') class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) - self.archive = create_arborescence_zip( + self.archive = create_arborescence_archive( self.root_path, 'archive1', 'file1', b'some content in file') self.atom_entry = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr """ def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) _status = response_content['deposit_status'] if status_partial: expected_status = DEPOSIT_STATUS_PARTIAL else: expected_status = DEPOSIT_STATUS_VERIFIED self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_VERIFIED response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id + def create_deposit_archive_with_archive(self, archive_extension): + # we create the holding archive to a given extension + archive = create_arborescence_archive( + self.root_path, 'archive1', 'file1', b'some content in file', + extension=archive_extension) + + # now we create an archive holding the first created archive + invalid_archive = create_archive_with_archive( + self.root_path, 'invalid.tar.gz', archive) + + # we deposit it + response = self.client.post( + reverse(COL_IRI, args=[self.collection.name]), + content_type='application/x-tar', + data=invalid_archive['data'], + CONTENT_LENGTH=invalid_archive['length'], + HTTP_MD5SUM=invalid_archive['md5sum'], + HTTP_SLUG='external-id', + HTTP_IN_PROGRESS=False, + HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( + invalid_archive['name'], )) + + # then + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + response_content = parse_xml(BytesIO(response.content)) + _status = response_content['deposit_status'] + self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED) + deposit_id = int(response_content['deposit_id']) + return deposit_id + def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) _status = response_content['deposit_status'] if status_partial: expected_status = DEPOSIT_STATUS_PARTIAL else: expected_status = DEPOSIT_STATUS_DEPOSITED self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id @attr('fs') class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' _provider_url = 'https://hal-test.archives-ouvertes.fr/' _domain = 'archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, provider_url=_provider_url, domain=_domain) _client.collections = [_collection.id] _client.last_name = _name _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id https://hal-test.archives-ouvertes.fr/some-external-id """ self.atom_entry_data1 = b""" - anotherthing - https://hal-test.archives-ouvertes.fr/anotherthing + some awesome author """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter """ def create_deposit_with_invalid_archive(self, external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None, status_detail=None): # create an invalid deposit which we will update further down the line deposit_id = self.create_deposit_with_invalid_archive(external_id) # We cannot create some form of deposit with a given status in # test context ('rejected' for example). Update in place the # deposit with such status to permit some further tests. deposit = Deposit.objects.get(pk=deposit_id) if status == DEPOSIT_STATUS_REJECTED: deposit.status_detail = status_detail deposit.status = status if swh_id: deposit.swh_id = swh_id deposit.save() return deposit_id def create_simple_deposit_partial(self, external_id='some-external-id'): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `deposited` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `deposited`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: if dr.type.name == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id diff --git a/version.txt b/version.txt index a0c31f0e..34dba199 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.55-0-g6317a5a \ No newline at end of file +v0.0.56-0-g6367c2e \ No newline at end of file