diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
index 21eae3d0..e986c329 100644
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -1,104 +1,121 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import zipfile
from rest_framework import status
from ..common import SWHGetDepositAPI, SWHPrivateAPIView
from ...config import DEPOSIT_STATUS_READY, DEPOSIT_STATUS_REJECTED
-from ...config import ARCHIVE_TYPE
+from ...config import ARCHIVE_TYPE, METADATA_TYPE
from ...models import Deposit, DepositRequest
class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView):
"""Dedicated class to read a deposit's raw archives content.
Only GET is supported.
"""
+
def deposit_requests(self, deposit):
"""Given a deposit, yields its associated deposit_request
Yields:
deposit request
"""
deposit_requests = DepositRequest.objects.filter(
deposit=deposit).order_by('id')
for deposit_request in deposit_requests:
yield deposit_request
def _check_archive(self, archive):
"""Check that a given archive is actually ok for reading.
Args:
archive (File): Archive to check
Returns:
True if archive is successfully read, False otherwise.
"""
try:
zf = zipfile.ZipFile(archive.path)
zf.infolist()
except Exception as e:
return False
else:
return True
def _check_metadata(self, metadata):
"""Check to execute on metadata.
Args:
metadata (): Metadata to actually check
Returns:
True if metadata is ok, False otherwise.
"""
- # FIXME: Define checks to implement
+ must_meta = ['url', 'external_identifier', ['name', 'title'], 'author']
+ # checks only for must metadata on all metadata requests
+ for mm in must_meta:
+ found = False
+ for k in metadata:
+ if isinstance(mm, list):
+ for p in mm:
+ if p in k:
+ found = True
+ break
+ elif mm in k:
+ found = True
+ break
+ if not found:
+ return False
return True
def process_get(self, req, collection_name, deposit_id):
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
req (Request):
collection_name (str): Collection owning the deposit
deposit_id (id): Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
deposit = Deposit.objects.get(pk=deposit_id)
+ all_metadata = {}
# will check each deposit request for the deposit
for dr in self.deposit_requests(deposit):
if dr.type.name == ARCHIVE_TYPE:
deposit_status = self._check_archive(dr.archive)
- else:
- deposit_status = self._check_metadata(dr.metadata)
-
+ elif dr.type.name == METADATA_TYPE:
+ # aggregating all metadata requests for check on complete set
+ all_metadata.update(dr.metadata)
if not deposit_status:
break
+ deposit_status = self._check_metadata(all_metadata)
# if problem in any deposit requests, the deposit is rejected
if not deposit_status:
deposit.status = DEPOSIT_STATUS_REJECTED
else:
deposit.status = DEPOSIT_STATUS_READY
deposit.save()
return (status.HTTP_200_OK,
json.dumps({
'status': deposit.status
}),
'application/json')
diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py
index e03a4181..851e1317 100644
--- a/swh/deposit/tests/api/test_deposit_check.py
+++ b/swh/deposit/tests/api/test_deposit_check.py
@@ -1,71 +1,98 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
from django.core.urlresolvers import reverse
from nose.tools import istest
from nose.plugins.attrib import attr
from rest_framework import status
from rest_framework.test import APITestCase
from ...models import Deposit
from ...config import DEPOSIT_STATUS_READY, PRIVATE_CHECK_DEPOSIT
from ...config import DEPOSIT_STATUS_READY_FOR_CHECKS, DEPOSIT_STATUS_REJECTED
from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine
from ..common import FileSystemCreationRoutine
@attr('fs')
class CheckDepositTest(APITestCase, WithAuthTestCase,
BasicTestCase, CommonCreationRoutine,
FileSystemCreationRoutine):
"""Check deposit endpoints.
"""
def setUp(self):
super().setUp()
@istest
def deposit_ok(self):
"""Proper deposit should succeed the checks (-> status ready)
"""
- deposit_id = self.create_simple_binary_deposit(status_partial=False)
+ deposit_id = self.create_simple_binary_deposit(status_partial=True)
+ deposit_id = self.update_binary_deposit(deposit_id,
+ status_partial=False)
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
url = reverse(PRIVATE_CHECK_DEPOSIT,
args=[self.collection.name, deposit.id])
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = json.loads(response.content.decode('utf-8'))
self.assertEqual(data['status'], DEPOSIT_STATUS_READY)
deposit = Deposit.objects.get(pk=deposit.id)
self.assertEquals(deposit.status, DEPOSIT_STATUS_READY)
@istest
def deposit_ko(self):
"""Invalid deposit should fail the checks (-> status rejected)
"""
deposit_id = self.create_invalid_deposit()
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
url = reverse(PRIVATE_CHECK_DEPOSIT,
args=[self.collection.name, deposit.id])
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = json.loads(response.content.decode('utf-8'))
self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED)
deposit = Deposit.objects.get(pk=deposit.id)
self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED)
+
+ @istest
+ def check_deposit_metadata_ok(self):
+ """Proper deposit should succeed the checks (-> status ready)
+ with all **MUST** metadata
+
+ using the codemeta metadata test set
+ """
+ deposit_id = self.create_simple_binary_deposit(status_partial=True)
+ deposit_id_metadata = self.add_metadata_to_deposit(deposit_id)
+ self.assertEquals(deposit_id, deposit_id_metadata)
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
+
+ url = reverse(PRIVATE_CHECK_DEPOSIT,
+ args=[self.collection.name, deposit.id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ data = json.loads(response.content.decode('utf-8'))
+ self.assertEqual(data['status'], DEPOSIT_STATUS_READY)
+ deposit = Deposit.objects.get(pk=deposit.id)
+ self.assertEquals(deposit.status, DEPOSIT_STATUS_READY)
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
index eecfb30b..c98f2fca 100644
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -1,426 +1,457 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import base64
import hashlib
import os
import shutil
import tempfile
from django.core.urlresolvers import reverse
from django.test import TestCase
from io import BytesIO
from nose.plugins.attrib import attr
from rest_framework import status
from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI
from swh.deposit.config import DEPOSIT_STATUS_REJECTED
from swh.deposit.models import DepositClient, DepositCollection, Deposit
from swh.deposit.models import DepositRequest
from swh.deposit.models import DepositRequestType
from swh.deposit.parsers import parse_xml
from swh.deposit.settings.testing import MEDIA_ROOT
from swh.loader.tar import tarball
def create_arborescence_zip(root_path, archive_name, filename, content,
up_to_size=None):
"""Build an archive named archive_name in the root_path.
This archive contains one file named filename with the content content.
Returns:
dict with the keys:
- dir: the directory of that archive
- path: full path to the archive
- sha1sum: archive's sha1sum
- length: archive's length
"""
os.makedirs(root_path, exist_ok=True)
archive_path_dir = tempfile.mkdtemp(dir=root_path)
dir_path = os.path.join(archive_path_dir, archive_name)
os.mkdir(dir_path)
filepath = os.path.join(dir_path, filename)
l = len(content)
count = 0
batch_size = 128
with open(filepath, 'wb') as f:
f.write(content)
if up_to_size: # fill with blank content up to a given size
count += l
while count < up_to_size:
f.write(b'0'*batch_size)
count += batch_size
zip_path = dir_path + '.zip'
zip_path = tarball.compress(zip_path, 'zip', dir_path)
with open(zip_path, 'rb') as f:
length = 0
sha1sum = hashlib.sha1()
md5sum = hashlib.md5()
data = b''
for chunk in f:
sha1sum.update(chunk)
md5sum.update(chunk)
length += len(chunk)
data += chunk
return {
'dir': archive_path_dir,
'name': archive_name,
'data': data,
'path': zip_path,
'sha1sum': sha1sum.hexdigest(),
'md5sum': md5sum.hexdigest(),
'length': length,
}
@attr('fs')
class FileSystemCreationRoutine(TestCase):
"""Mixin intended for tests needed to tamper with archives.
"""
def setUp(self):
"""Define the test client and other test variables."""
super().setUp()
self.root_path = '/tmp/swh-deposit/test/build-zip/'
os.makedirs(self.root_path, exist_ok=True)
self.archive = create_arborescence_zip(
self.root_path, 'archive1', 'file1', b'some content in file')
+ self.atom_entry = b"""
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 1785io25c695
+ 2017-10-07T15:17:08Z
+ some awesome author
+ http://test.test.fr
+ """
+
def tearDown(self):
super().tearDown()
shutil.rmtree(self.root_path)
def create_simple_binary_deposit(self, status_partial=True):
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/zip',
data=self.archive['data'],
CONTENT_LENGTH=self.archive['length'],
HTTP_MD5SUM=self.archive['md5sum'],
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial,
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
self.archive['name'], ))
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
return deposit_id
def create_complex_binary_deposit(self, status_partial=False):
deposit_id = self.create_simple_binary_deposit(
status_partial=True)
# Add a second archive to the deposit
# update its status to DEPOSIT_STATUS_READY
response = self.client.post(
reverse(EM_IRI, args=[self.collection.name, deposit_id]),
content_type='application/zip',
data=self.archive2['data'],
CONTENT_LENGTH=self.archive2['length'],
HTTP_MD5SUM=self.archive2['md5sum'],
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial,
HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip')
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
return deposit_id
+ def update_binary_deposit(self, deposit_id, status_partial=False):
+ # update existing deposit with atom entry metadata
+ response = self.client.post(
+ reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
+ content_type='application/atom+xml;type=entry',
+ data=self.codemeta_entry_data1,
+ HTTP_SLUG='external-id',
+ HTTP_IN_PROGRESS=status_partial)
+
+ # then
+ # assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content[
+ '{http://www.w3.org/2005/Atom}deposit_id']
+ return deposit_id
+
@attr('fs')
class BasicTestCase(TestCase):
"""Mixin intended for data setup purposes (user, collection, etc...)
"""
def setUp(self):
"""Define the test client and other test variables."""
super().setUp()
# expanding diffs in tests
self.maxDiff = None
# basic minimum test data
deposit_request_types = {}
# Add deposit request types
for deposit_request_type in ['archive', 'metadata']:
drt = DepositRequestType(name=deposit_request_type)
drt.save()
deposit_request_types[deposit_request_type] = drt
_name = 'hal'
_url = 'https://hal.test.fr/'
# set collection up
_collection = DepositCollection(name=_name)
_collection.save()
# set user/client up
_client = DepositClient.objects.create_user(username=_name,
password=_name,
url=_url)
_client.collections = [_collection.id]
_client.save()
self.collection = _collection
self.user = _client
self.username = _name
self.userpass = _name
self.deposit_request_types = deposit_request_types
def tearDown(self):
super().tearDown()
# Clean up uploaded files in temporary directory (tests have
# their own media root folder)
if os.path.exists(MEDIA_ROOT):
for d in os.listdir(MEDIA_ROOT):
shutil.rmtree(os.path.join(MEDIA_ROOT, d))
class WithAuthTestCase(TestCase):
"""Mixin intended for testing the api with basic authentication.
"""
def setUp(self):
super().setUp()
_token = '%s:%s' % (self.username, self.userpass)
token = base64.b64encode(_token.encode('utf-8'))
authorization = 'Basic %s' % token.decode('utf-8')
self.client.credentials(HTTP_AUTHORIZATION=authorization)
def tearDown(self):
super().tearDown()
self.client.credentials()
class CommonCreationRoutine(TestCase):
"""Mixin class to share initialization routine.
cf:
`class`:test_deposit_update.DepositReplaceExistingDataTest
`class`:test_deposit_update.DepositUpdateDepositWithNewDataTest
`class`:test_deposit_update.DepositUpdateFailuresTest
`class`:test_deposit_delete.DepositDeleteTest
"""
def setUp(self):
super().setUp()
self.atom_entry_data0 = b"""
-
- some-external-id
-"""
+
+ some-external-id
+ """
self.atom_entry_data1 = b"""
-
- anotherthing
-"""
+
+ anotherthing
+ """
self.atom_entry_data2 = b"""
Awesome Compiler
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
1785io25c695
2017-10-07T15:17:08Z
some awesome author
+ http://test.test.fr
"""
self.codemeta_entry_data0 = b"""
Awesome Compiler
+ http://test.test.fr
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
1785io25c695
2017-10-07T15:17:08Z
some awesome author
description
key-word 1
"""
self.codemeta_entry_data1 = b"""
Composing a Web of Audio Applications
hal
hal-01243065
hal-01243065
https://hal-test.archives-ouvertes.fr/hal-01243065
test
DSP programming,Web
2017-05-03T16:08:47+02:00
this is the description
1
phpstorm
stable
php
python
C
GNU General Public License v3.0 only
CeCILL Free Software License Agreement v1.1
HAL
hal@ccsd.cnrs.fr
+
+ Morane Gruenpeter
+
"""
def create_invalid_deposit(self):
url = reverse(COL_IRI, args=[self.collection.name])
data = b'some data which is clearly not a zip file'
md5sum = hashlib.md5(data).hexdigest()
external_id = 'some-external-id-1'
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=data,
# + headers
CONTENT_LENGTH=len(data),
# other headers needs HTTP_ prefix to be taken into account
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=md5sum,
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
return deposit_id
def create_deposit_with_status_rejected(self):
deposit_id = self.create_invalid_deposit()
# We cannot create rejected deposit in test context (we
# flipped off the checks in the configuration so all deposits
# have the status ready-for-checks). Update in place the
# deposit with such status
deposit = Deposit.objects.get(pk=deposit_id)
deposit.status = DEPOSIT_STATUS_REJECTED
deposit.save()
return deposit_id
def create_simple_deposit_partial(self):
"""Create a simple deposit (1 request) in `partial` state and returns
its new identifier.
Returns:
deposit id
"""
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/atom+xml;type=entry',
data=self.atom_entry_data0,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS='true')
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
return deposit_id
def create_deposit_partial_with_data_in_args(self, data):
"""Create a simple deposit (1 request) in `partial` state with the data
or metadata as an argument and returns its new identifier.
Args:
data: atom entry
Returns:
deposit id
"""
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/atom+xml;type=entry',
data=data,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS='true')
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
return deposit_id
def _update_deposit_with_status(self, deposit_id, status_partial=False):
"""Add to a given deposit another archive and update its current
status to `ready-for-checks` (by default).
Returns:
deposit id
"""
# when
response = self.client.post(
reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
content_type='application/atom+xml;type=entry',
data=self.atom_entry_data1,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial)
# then
assert response.status_code == status.HTTP_201_CREATED
return deposit_id
def create_deposit_ready(self):
"""Create a complex deposit (2 requests) in status `ready-for-checks`.
"""
deposit_id = self.create_simple_deposit_partial()
deposit_id = self._update_deposit_with_status(deposit_id)
return deposit_id
def create_deposit_partial(self):
"""Create a complex deposit (2 requests) in status `partial`.
"""
deposit_id = self.create_simple_deposit_partial()
deposit_id = self._update_deposit_with_status(
deposit_id, status_partial=True)
return deposit_id
def add_metadata_to_deposit(self, deposit_id, status_partial=False):
"""Add metadata to deposit.
"""
# when
response = self.client.post(
reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
content_type='application/atom+xml;type=entry',
data=self.codemeta_entry_data1,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial)
assert response.status_code == status.HTTP_201_CREATED
# then
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit is not None
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
assert deposit_requests is not []
for dr in deposit_requests:
if dr.type.name == 'metadata':
assert deposit_requests[0].metadata is not {}
return deposit_id
diff --git a/swh/deposit/tests/loader/test_checker.py b/swh/deposit/tests/loader/test_checker.py
index 740089b8..55d8cd9a 100644
--- a/swh/deposit/tests/loader/test_checker.py
+++ b/swh/deposit/tests/loader/test_checker.py
@@ -1,70 +1,72 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from nose.tools import istest
from rest_framework.test import APITestCase
from swh.deposit.models import Deposit
from swh.deposit.config import PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_READY
from swh.deposit.config import DEPOSIT_STATUS_REJECTED
from swh.deposit.loader.checker import DepositChecker
from django.core.urlresolvers import reverse
from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG
from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine
from ..common import FileSystemCreationRoutine
class DepositCheckerScenarioTest(APITestCase, WithAuthTestCase,
BasicTestCase, CommonCreationRoutine,
FileSystemCreationRoutine):
def setUp(self):
super().setUp()
# 2. Sets a basic client which accesses the test data
checker_client = SWHDepositTestClient(client=self.client,
config=CLIENT_TEST_CONFIG)
# 3. setup loader with no persistence and that client
self.checker = DepositChecker(client=checker_client)
@istest
def check_deposit_ready(self):
"""Check a valid deposit ready-for-checks should result in ready state
"""
# 1. create a deposit with archive and metadata
deposit_id = self.create_simple_binary_deposit()
+ deposit_id = self.update_binary_deposit(deposit_id,
+ status_partial=False)
args = [self.collection.name, deposit_id]
deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args)
# when
actual_status = self.checker.check(deposit_check_url=deposit_check_url)
# then
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEquals(deposit.status, DEPOSIT_STATUS_READY)
self.assertEquals(actual_status, DEPOSIT_STATUS_READY)
@istest
def check_deposit_rejected(self):
"""Check an invalid deposit ready-for-checks should result in rejected
"""
# 1. create a deposit with archive and metadata
deposit_id = self.create_invalid_deposit()
args = [self.collection.name, deposit_id]
deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args)
# when
actual_status = self.checker.check(deposit_check_url=deposit_check_url)
# then
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED)
self.assertEquals(actual_status, DEPOSIT_STATUS_REJECTED)
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
index dac48c5d..d31d8ba0 100644
--- a/swh/deposit/tests/loader/test_loader.py
+++ b/swh/deposit/tests/loader/test_loader.py
@@ -1,286 +1,289 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import unittest
import shutil
from nose.tools import istest
from nose.plugins.attrib import attr
from rest_framework.test import APITestCase
from swh.model import hashutil
from swh.deposit.loader import loader
from swh.deposit.config import PRIVATE_GET_RAW_CONTENT
from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
from swh.deposit.config import PRIVATE_PUT_DEPOSIT
from django.core.urlresolvers import reverse
from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG
from .. import TEST_LOADER_CONFIG
from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine
from ..common import FileSystemCreationRoutine
TOOL_ID = 99
PROVIDER_ID = 12
class DepositLoaderInhibitsStorage:
"""Mixin class to inhibit the persistence and keep in memory the data
sent for storage.
cf. SWHDepositLoaderNoStorage
"""
def __init__(self, client=None):
# client is not used here, transit it nonetheless to other mixins
super().__init__(client=client)
# typed data
self.state = {
'origin': [],
'origin_visit': [],
'origin_metadata': [],
'content': [],
'directory': [],
'revision': [],
'release': [],
'occurrence': [],
'tool': [],
'provider': []
}
def _add(self, type, l):
"""Add without duplicates and keeping the insertion order.
Args:
type (str): Type of objects concerned by the action
l ([object]): List of 'type' object
"""
col = self.state[type]
for o in l:
if o in col:
continue
col.extend([o])
def send_origin(self, origin):
origin.update({'id': 1})
self._add('origin', [origin])
return origin['id']
def send_origin_visit(self, origin_id, visit_date):
origin_visit = {
'origin': origin_id,
'visit_date': visit_date,
'visit': 1,
}
self._add('origin_visit', [origin_visit])
return origin_visit
def send_origin_metadata(self, origin_id, visit_date, provider_id, tool_id,
metadata):
origin_metadata = {
'origin_id': origin_id,
'visit_date': visit_date,
'provider_id': provider_id,
'tool_id': tool_id,
'metadata': metadata
}
self._add('origin_metadata', [origin_metadata])
return origin_metadata
def send_tool(self, tool):
tool = {
'tool_name': tool['tool_name'],
'tool_version': tool['tool_version'],
'tool_configuration': tool['tool_configuration']
}
self._add('tool', [tool])
tool_id = TOOL_ID
return tool_id
def send_provider(self, provider):
provider = {
'provider_name': provider['provider_name'],
'provider_type': provider['provider_type'],
'provider_url': provider['provider_url'],
'metadata': provider['metadata']
}
self._add('provider', [provider])
provider_id = PROVIDER_ID
return provider_id
def maybe_load_contents(self, contents):
self._add('content', contents)
def maybe_load_directories(self, directories):
self._add('directory', directories)
def maybe_load_revisions(self, revisions):
self._add('revision', revisions)
def maybe_load_releases(self, releases):
self._add('release', releases)
def maybe_load_occurrences(self, occurrences):
self._add('occurrence', occurrences)
def open_fetch_history(self):
pass
def close_fetch_history_failure(self, fetch_history_id):
pass
def close_fetch_history_success(self, fetch_history_id):
pass
def update_origin_visit(self, origin_id, visit, status):
self.status = status
# Override to do nothing at the end
def close_failure(self):
pass
def close_success(self):
pass
class TestLoaderUtils(unittest.TestCase):
def assertRevisionsOk(self, expected_revisions):
"""Check the loader's revisions match the expected revisions.
Expects self.loader to be instantiated and ready to be
inspected (meaning the loading took place).
Args:
expected_revisions (dict): Dict with key revision id,
value the targeted directory id.
"""
# The last revision being the one used later to start back from
for rev in self.loader.state['revision']:
rev_id = hashutil.hash_to_hex(rev['id'])
directory_id = hashutil.hash_to_hex(rev['directory'])
self.assertEquals(expected_revisions[rev_id], directory_id)
class SWHDepositLoaderNoStorage(DepositLoaderInhibitsStorage,
loader.DepositLoader):
"""Loader to test.
It inherits from the actual deposit loader to actually test its
correct behavior. It also inherits from
DepositLoaderInhibitsStorage so that no persistence takes place.
"""
pass
@attr('fs')
class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase,
BasicTestCase, CommonCreationRoutine,
FileSystemCreationRoutine, TestLoaderUtils):
def setUp(self):
super().setUp()
# create the extraction dir used by the loader
os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True)
# 1. create a deposit with archive and metadata
self.deposit_id = self.create_simple_binary_deposit()
# 2. Sets a basic client which accesses the test data
loader_client = SWHDepositTestClient(self.client,
config=CLIENT_TEST_CONFIG)
# 3. setup loader with no persistence and that client
self.loader = SWHDepositLoaderNoStorage(client=loader_client)
def tearDown(self):
super().tearDown()
shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir'])
@istest
def inject_deposit_ready(self):
"""Load a deposit which is ready
"""
args = [self.collection.name, self.deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
# when
self.loader.load(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
# then
self.assertEquals(len(self.loader.state['content']), 1)
self.assertEquals(len(self.loader.state['directory']), 1)
self.assertEquals(len(self.loader.state['revision']), 1)
self.assertEquals(len(self.loader.state['release']), 0)
self.assertEquals(len(self.loader.state['occurrence']), 1)
@istest
def inject_deposit_verify_metadata(self):
"""Load a deposit with metadata, test metadata integrity
"""
self.deposit_metadata_id = self.add_metadata_to_deposit(
self.deposit_id)
args = [self.collection.name, self.deposit_metadata_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
# when
self.loader.load(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
# then
self.assertEquals(len(self.loader.state['content']), 1)
self.assertEquals(len(self.loader.state['directory']), 1)
self.assertEquals(len(self.loader.state['revision']), 1)
self.assertEquals(len(self.loader.state['release']), 0)
self.assertEquals(len(self.loader.state['occurrence']), 1)
self.assertEquals(len(self.loader.state['origin_metadata']), 1)
self.assertEquals(len(self.loader.state['tool']), 1)
self.assertEquals(len(self.loader.state['provider']), 1)
atom = '{http://www.w3.org/2005/Atom}'
codemeta = '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}'
expected_origin_metadata = {
atom + 'author': {
atom + 'email': 'hal@ccsd.cnrs.fr',
atom + 'name': 'HAL'
},
codemeta + 'url':
'https://hal-test.archives-ouvertes.fr/hal-01243065',
codemeta + 'runtimePlatform': 'phpstorm',
codemeta + 'license': {
codemeta + 'name':
'CeCILL Free Software License Agreement v1.1'
},
+ codemeta + 'author': {
+ codemeta + 'name': 'Morane Gruenpeter'
+ },
codemeta + 'programmingLanguage': 'C',
codemeta + 'applicationCategory': 'test',
codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00',
codemeta + 'version': 1,
atom + 'external_identifier': 'hal-01243065',
atom + 'title': 'Composing a Web of Audio Applications',
codemeta + 'description': 'this is the description',
atom + 'id': 'hal-01243065',
atom + 'client': 'hal',
codemeta + 'keywords': 'DSP programming,Web',
codemeta + 'developmentStatus': 'stable'
}
result = self.loader.state['origin_metadata'][0]
self.assertEquals(result['metadata'], expected_origin_metadata)
self.assertEquals(result['tool_id'], TOOL_ID)
self.assertEquals(result['provider_id'], PROVIDER_ID)