diff --git a/PKG-INFO b/PKG-INFO index a1cf90f3..c2bef65e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.53 +Version: 0.0.54 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index a1cf90f3..c2bef65e 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.53 +Version: 0.0.54 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index f662065f..d3fa130b 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,153 +1,154 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile Makefile.local README.md requirements-swh.txt requirements.txt setup.py version.txt bin/Makefile bin/content.sh bin/create_deposit.sh bin/create_deposit_atom.sh bin/create_deposit_with_metadata.sh bin/default-setup bin/download-deposit-archive.sh bin/home.sh bin/replace-deposit-archive.sh bin/service-document.sh bin/status.sh bin/swh-deposit bin/update-deposit-with-another-archive.sh bin/update-status.sh debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format docs/.gitignore docs/Makefile docs/blueprint.rst docs/conf.py docs/dev-info.rst docs/getting-started.rst docs/index.rst docs/metadata.rst docs/spec-api.rst docs/spec-loading.rst docs/sys-info.rst docs/_static/.placeholder docs/_templates/.placeholder docs/endpoints/collection.rst docs/endpoints/content.rst docs/endpoints/service-document.rst docs/endpoints/status.rst docs/endpoints/update-media.rst docs/endpoints/update-metadata.rst docs/images/deposit-create-chart.png docs/images/deposit-delete-chart.png docs/images/deposit-update-chart.png resources/deposit/server.yml swh/__init__.py swh/manage.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/create_user.py swh/deposit/errors.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/client/__init__.py swh/deposit/client/cli.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/scheduler.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py +swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_read_archive.py swh/deposit/tests/api/test_deposit_read_metadata.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_status.py swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py \ No newline at end of file diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 9a44ab4c..c0e92c13 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,183 +1,242 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import patoolib from rest_framework import status from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE, METADATA_TYPE from ...models import Deposit, DepositRequest class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): Archive or metadata type Yields: deposit requests of type request_type associated to the deposit """ deposit_requests = DepositRequest.objects.filter( type=self.deposit_request_types[request_type], deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns - True if all archives are ok, False otherwise. + tuple (status, error_detail): True, None if all archives + are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused - return False + return False, { + 'archive': { + 'summary': 'Deposit without archive is rejected.', + 'id': deposit.id, + } + } + rejected_dr_ids = [] for dr in requests: - check = self._check_archive(dr.archive.path) + _path = dr.archive.path + check = self._check_archive(_path) if not check: - return False - return True + rejected_dr_ids.append(dr.id) + + if rejected_dr_ids: + return False, { + 'archive': { + 'summary': 'Following deposit request ids are rejected ' + 'because their associated archive is not ' + 'readable', + 'ids': rejected_dr_ids, + }} + return True, None def _check_archive(self, archive_path): """Check that a given archive is actually ok for reading. Args: archive_path (str): Archive to check Returns: True if archive is successfully read, False otherwise. """ try: patoolib.test_archive(archive_path, verbosity=-1) except Exception: return False else: return True def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: - The deposit to check metadata for. + deposit (Deposit): The deposit instance to extract + metadata from. Returns: - True if the deposit's associated metadata are ok, False otherwise. + metadata dict from the deposit. """ metadata = {} for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): metadata.update(dr.metadata) return metadata def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: - metadata (dict): Metadata to actually check + metadata (dict): Metadata dictionary to check for mandatory fields Returns: - True if metadata is ok, False otherwise. + tuple (status, error_detail): True, None if metadata are + ok (False, ) otherwise. """ - required_fields = (('url',), - ('external_identifier',), - ('name', 'title'), - ('author',)) - - result = all(any(name in field - for field in metadata - for name in possible_names) - for possible_names in required_fields) - return result + required_fields = { + 'url': False, + 'external_identifier': False, + 'author': False, + } + alternate_fields = { + ('name', 'title'): False, # alternate field, at least one + # of them must be present + } + + for field, value in metadata.items(): + for name in required_fields: + if name in field: + required_fields[name] = True + + for possible_names in alternate_fields: + for possible_name in possible_names: + if possible_name in field: + alternate_fields[possible_names] = True + continue + + mandatory_result = [k for k, v in required_fields.items() if not v] + optional_result = [ + k for k, v in alternate_fields.items() if not v] + + if mandatory_result == [] and optional_result == []: + return True, None + detail = [] + if mandatory_result != []: + detail.append({ + 'summary': 'Mandatory fields are missing', + 'fields': mandatory_result + }) + if optional_result != []: + detail.append({ + 'summary': 'Mandatory alternate fields are missing', + 'fields': optional_result, + }) + return False, { + 'metadata': detail + } def _check_url(self, client_domain, metadata): """Check compatibility between client_domain and url field in metadata Args: client_domain (str): url associated with the deposit's client metadata (dict): Metadata where to find url + Returns: - True if url is ok, False otherwise. + tuple (status, error_detail): True, None if url associated + with the deposit's client is ok, (False, + ) otherwise. """ - metadata_urls = [] + url_fields = [] for field in metadata: - if 'url' in field: - metadata_urls.append(metadata[field]) - - return any(client_domain in url - for url in metadata_urls) + url_fields.append(field) + if 'url' in field and client_domain in metadata[field]: + return True, None + + return False, { + 'url': { + 'summary': "At least one url field must be compatible with the" + "client's domain name. The following url fields " + "failed the check.", + 'fields': url_fields, + }} def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) client_domain = deposit.client.domain metadata = self._metadata_get(deposit) - problems = [] + problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors - archives_status = self._check_deposit_archives(deposit) + archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: - problems.append('archive(s)') + problems.update(error_detail) - metadata_status = self._check_metadata(metadata) + metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: - problems.append('metadata') + problems.update(error_detail) - url_status = self._check_url(client_domain, metadata) + url_status, error_detail = self._check_url(client_domain, metadata) if not url_status: - problems.append('url') + problems.update(error_detail) deposit_status = archives_status and metadata_status and url_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED + deposit.status_detail = problems response = { 'status': deposit.status, - 'details': 'Some %s failed the checks.' % ( - ' and '.join(problems), ), + 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/migrations/0012_deposit_status_detail.py b/swh/deposit/migrations/0012_deposit_status_detail.py new file mode 100644 index 00000000..b6825072 --- /dev/null +++ b/swh/deposit/migrations/0012_deposit_status_detail.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.14 on 2018-07-09 13:08 +from __future__ import unicode_literals + +import django.contrib.postgres.fields.jsonb +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0011_auto_20180115_1510'), + ] + + operations = [ + migrations.AddField( + model_name='deposit', + name='status_detail', + field=django.contrib.postgres.fields.jsonb.JSONField(null=True), + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py index 68fc5677..5d9bbadb 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,210 +1,215 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now from .config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED from .config import DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS -from .config import DEPOSIT_STATUS_LOAD_FAILURE +from .config import DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_REJECTED class Dbversion(models.Model): """Db version """ version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: db_table = 'dbversion' def __str__(self): return str({ 'version': self.version, 'release': self.release, 'description': self.description }) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), ('expired', 'expired'), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), ('rejected', 'rejected'), ('loading', 'loading'), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' 'In-Progress header should be false', 'expired': 'Deposit has been there too long and is now ' 'deemed ready to be garbage collected', DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' '(tarball ok, metadata, etc...)', DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' 'ready for loading', 'rejected': 'Deposit failed the checks', 'loading': "Loading is ongoing on swh's side", DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' 'loaded into the Software Heritage archive', DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' 'Software Heritage archive failed', } class DepositClient(User): """Deposit client """ collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: db_table = 'deposit_client' def __str__(self): return str({ 'id': self.id, 'collections': self.collections, 'username': super().username, 'domain': self.domain, 'provider_url': self.provider_url, }) class Deposit(models.Model): """Deposit reception table """ id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit collection = models.ForeignKey( 'DepositCollection', models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client client = models.ForeignKey('DepositClient', models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) # Deposit's status regarding loading status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) + status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', null=True) class Meta: db_table = 'deposit' def __str__(self): - return str({ + d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, - 'status': self.status - }) + 'status': self.status, + } + + if self.status in (DEPOSIT_STATUS_REJECTED): + d['status_detail'] = self.status_detail + return str(d) class DepositRequestType(models.Model): """Deposit request type made by clients (either archive or metadata) """ id = models.BigAutoField(primary_key=True) name = models.TextField() class Meta: db_table = 'deposit_request_type' def __str__(self): return str({'id': self.id, 'name': self.name}) def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) class DepositRequest(models.Model): """Deposit request associated to one deposit. """ id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) type = models.ForeignKey( 'DepositRequestType', models.DO_NOTHING) class Meta: db_table = 'deposit_request' def __str__(self): meta = None if self.metadata: from json import dumps meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name return str({ 'id': self.id, 'deposit': self.deposit, 'metadata': meta, 'archive': archive_name }) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: db_table = 'deposit_collection' def __str__(self): return str({'id': self.id, 'name': self.name}) diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py index dfda6759..f1142b17 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,147 +1,203 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import unittest from django.core.urlresolvers import reverse from nose.tools import istest from nose.plugins.attrib import attr from rest_framework import status from rest_framework.test import APITestCase from ...models import Deposit from ...config import DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT from ...config import DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine from ...api.private.deposit_check import SWHChecksDeposit @attr('fs') class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() @istest def deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) @istest def deposit_ko(self): """Invalid deposit should fail the checks (-> status rejected) """ - deposit_id = self.create_invalid_deposit() + deposit_id = self.create_deposit_with_invalid_archive() deposit = Deposit.objects.get(pk=deposit_id) - self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) + self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) - self.assertEqual(data['details'], - 'Some archive(s) and metadata and url ' + - 'failed the checks.') + expected_error = { + 'metadata': [ + { + 'fields': ['url', 'external_identifier', 'author'], + 'summary': 'Mandatory fields are missing' + }, + { + 'fields': [['name', 'title']], + 'summary': 'Mandatory alternate fields are missing' + }], + } + details = data['details'] + # archive checks failure + self.assertEqual(len(details['archive']['ids']), 1) + self.assertEqual(details['archive']['summary'], + 'Following deposit request ids are ' + 'rejected because their associated archive' + ' is not readable') + # metadata check failure + self.assertEqual(len(details['metadata']), 2) + mandatory = details['metadata'][0] + self.assertEqual(mandatory['summary'], 'Mandatory fields are missing') + self.assertEqual(set(mandatory['fields']), + set(['url', 'external_identifier', 'author'])) + alternate = details['metadata'][1] + self.assertEqual(alternate['summary'], + 'Mandatory alternate fields are missing') + self.assertEqual(alternate['fields'], [['name', 'title']]) + # url check failure + self.assertEqual(details['url']['summary'], + "At least one url field must be compatible with the" + "client's domain name. The following url fields " + "failed the check.") + self.assertEqual(details['url']['fields'], []) + deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) @istest def check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEquals(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) class CheckMetadata(unittest.TestCase, SWHChecksDeposit): @istest def check_metadata_ok(self): - actual_check = self._check_metadata({ + actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', 'author': 'someone', }) self.assertTrue(actual_check) + self.assertIsNone(detail) @istest def check_metadata_ok2(self): - actual_check = self._check_metadata({ + actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', 'author': 'someone', }) self.assertTrue(actual_check) + self.assertIsNone(detail) @istest def check_metadata_ko(self): - actual_check = self._check_metadata({ + """Missing optional field should be caught + + """ + actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) + expected_error = { + 'metadata': [{ + 'summary': 'Mandatory alternate fields are missing', + 'fields': [('name', 'title')], + }] + } self.assertFalse(actual_check) + self.assertEqual(error_detail, expected_error) @istest def check_metadata_ko2(self): - actual_check = self._check_metadata({ + """Missing mandatory fields should be caught + + """ + actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) + expected_error = { + 'metadata': [{ + 'summary': 'Mandatory fields are missing', + 'fields': ['author'], + }] + } + self.assertFalse(actual_check) + self.assertEqual(error_detail, expected_error) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index c08324e3..0bfab3eb 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,460 +1,475 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil import tempfile from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from nose.plugins.attrib import attr from rest_framework import status -from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI +from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI, + DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_VERIFIED, + DEPOSIT_STATUS_DEPOSITED) from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball def create_arborescence_zip(root_path, archive_name, filename, content, up_to_size=None): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size zip_path = dir_path + '.zip' zip_path = tarball.compress(zip_path, 'zip', dir_path) with open(zip_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': archive_path_dir, 'name': archive_name, 'data': data, 'path': zip_path, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'length': length, } @attr('fs') class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) self.archive = create_arborescence_zip( self.root_path, 'archive1', 'file1', b'some content in file') self.atom_entry = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr """ def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then - assert response.status_code == status.HTTP_201_CREATED + self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) + _status = response_content['deposit_status'] + if status_partial: + expected_status = DEPOSIT_STATUS_PARTIAL + else: + expected_status = DEPOSIT_STATUS_VERIFIED + self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_VERIFIED response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then - # assert response.status_code == status.HTTP_201_CREATED + self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) + _status = response_content['deposit_status'] + if status_partial: + expected_status = DEPOSIT_STATUS_PARTIAL + else: + expected_status = DEPOSIT_STATUS_DEPOSITED + self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id @attr('fs') class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' _provider_url = 'https://hal-test.archives-ouvertes.fr/' _domain = 'archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, provider_url=_provider_url, domain=_domain) _client.collections = [_collection.id] _client.last_name = _name _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id https://hal-test.archives-ouvertes.fr/some-external-id """ self.atom_entry_data1 = b""" anotherthing https://hal-test.archives-ouvertes.fr/anotherthing """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter """ - def create_invalid_deposit(self, external_id='some-external-id-1'): + def create_deposit_with_invalid_archive(self, + external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None): - deposit_id = self.create_invalid_deposit(external_id) + # create an invalid deposit which we will update further down the line + deposit_id = self.create_deposit_with_invalid_archive(external_id) # We cannot create some form of deposit with a given status in - # test context ('rejected' for example). As flipped off the - # checks in the configuration so all deposits have the status - # deposited). Update in place the deposit with such - # status + # test context ('rejected' for example). Update in place the + # deposit with such status to permit some further tests. deposit = Deposit.objects.get(pk=deposit_id) deposit.status = status if swh_id: deposit.swh_id = swh_id deposit.save() return deposit_id def create_simple_deposit_partial(self, external_id='some-external-id'): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `deposited` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `deposited`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: if dr.type.name == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id diff --git a/swh/deposit/tests/loader/test_checker.py b/swh/deposit/tests/loader/test_checker.py index cb0d111a..b48afc92 100644 --- a/swh/deposit/tests/loader/test_checker.py +++ b/swh/deposit/tests/loader/test_checker.py @@ -1,72 +1,71 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from nose.tools import istest from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_VERIFIED from swh.deposit.config import DEPOSIT_STATUS_REJECTED from swh.deposit.loader.checker import DepositChecker from django.core.urlresolvers import reverse from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine class DepositCheckerScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() # 2. Sets a basic client which accesses the test data checker_client = SWHDepositTestClient(client=self.client, config=CLIENT_TEST_CONFIG) # 3. setup loader with no persistence and that client self.checker = DepositChecker(client=checker_client) @istest def check_deposit_ready(self): - """Check a valid deposit deposited should result in ready state + """Check on a valid 'deposited' deposit should result in 'verified' """ # 1. create a deposit with archive and metadata deposit_id = self.create_simple_binary_deposit() deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) # when actual_result = self.checker.check(deposit_check_url=deposit_check_url) - # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) self.assertEquals(actual_result, {'status': 'eventful'}) @istest def check_deposit_rejected(self): - """Check an invalid deposit deposited should result in rejected + """Check on invalid 'deposited' deposit should result in 'rejected' """ # 1. create a deposit with archive and metadata - deposit_id = self.create_invalid_deposit() + deposit_id = self.create_deposit_with_invalid_archive() args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) # when actual_result = self.checker.check(deposit_check_url=deposit_check_url) # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) self.assertEquals(actual_result, {'status': 'eventful'}) diff --git a/version.txt b/version.txt index 8e024313..9c7699eb 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.53-0-gaeaab02 \ No newline at end of file +v0.0.54-0-g95a7aaa \ No newline at end of file