diff --git a/docs/index.rst b/docs/index.rst --- a/docs/index.rst +++ b/docs/index.rst @@ -9,6 +9,7 @@ getting-started.md spec-api.md + metadata.md spec-injection.md dev-info.md sys-info.md diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -71,7 +71,7 @@ """ ADDITIONAL_CONFIG = { - 'extraction_dir': ('str', '/tmp/swh-deposit/archive/') + 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): @@ -120,6 +120,28 @@ """Class in charge of aggregating metadata on a deposit. """ + ADDITIONAL_CONFIG = { + 'provider': ('dict', { + 'provider_name': '', + 'provider_type': 'deposit_client', + 'provider_url': '', + 'metadata': { + } + }), + 'tool': ('dict', { + 'tool_name': 'swh-deposit', + 'tool_version': '0.0.1', + 'tool_configuration': { + 'sword_version': '2' + } + }) + } + + def __init__(self): + super().__init__() + self.provider = self.config['provider'] + self.tool = self.config['tool'] + def _aggregate_metadata(self, deposit, metadata_requests): """Retrieve and aggregates metadata information. @@ -143,15 +165,14 @@ """ data = {} - metadata_requests = [] # Retrieve tarballs/metadata information - metadata = self._aggregate_metadata(deposit, metadata_requests) + metadata = self._aggregate_metadata(deposit, requests) # Read information metadata data['origin'] = { - 'type': deposit.collection.name, - 'url': deposit.external_id, + 'type': 'deposit', + 'url': deposit.client.url + deposit.external_id, } # revision @@ -163,6 +184,10 @@ 'email': deposit.client.email, } + # metadata provider + self.provider['provider_name'] = deposit.client.last_name + self.provider['provider_url'] = deposit.client.url + revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) @@ -188,6 +213,11 @@ data['occurrence'] = { 'branch': 'master' } + data['origin_metadata'] = { + 'provider': self.provider, + 'tool': self.tool, + 'metadata': metadata + } return data diff --git a/swh/deposit/injection/loader.py b/swh/deposit/injection/loader.py --- a/swh/deposit/injection/loader.py +++ b/swh/deposit/injection/loader.py @@ -135,14 +135,34 @@ visit_date = datetime.datetime.now(tz=datetime.timezone.utc) revision = metadata['revision'] occurrence = metadata['occurrence'] + self.origin_metadata = metadata['origin_metadata'] self.client.update_deposit_status(deposit_update_url, 'injecting') + super().prepare(tar_path=archive, origin=origin, visit_date=visit_date, revision=revision, occurrences=[occurrence]) + def store_metadata(self): + """Storing the origin_metadata during the load processus. + + Fetching tool and metadata_provider from storage and adding the + metadata associated to the current origin. + + """ + origin_id = self.origin_id + visit_date = self.visit_date + provider = self.origin_metadata['provider'] + tool = self.origin_metadata['tool'] + metadata = self.origin_metadata['metadata'] + try: + self.send_origin_metadata(origin_id, visit_date, provider, + tool, metadata) + except: + self.log.exception('Problem when storing origin_metadata') + def post_load(self, success=True): """Updating the deposit's status according to its loading status. diff --git a/swh/deposit/migrations/0006_depositclient_url.py b/swh/deposit/migrations/0006_depositclient_url.py new file mode 100644 --- /dev/null +++ b/swh/deposit/migrations/0006_depositclient_url.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.7 on 2017-11-07 13:12 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0005_auto_20171019_1436'), + ] + + operations = [ + migrations.AddField( + model_name='depositclient', + name='url', + field=models.TextField(default='https://hal.archives-ouvertes.fr/'), + preserve_default=False, + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -72,6 +72,7 @@ """ collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() + url = models.TextField(null=False) class Meta: db_table = 'deposit_client' diff --git a/swh/deposit/tests/__init__.py b/swh/deposit/tests/__init__.py --- a/swh/deposit/tests/__init__.py +++ b/swh/deposit/tests/__init__.py @@ -12,6 +12,20 @@ TEST_CONFIG = { 'max_upload_size': 500, 'extraction_dir': '/tmp/swh-deposit/test/extraction-dir', + 'provider': { + 'provider_name': '', + 'provider_type': 'deposit_client', + 'provider_url': '', + 'metadata': { + } + }, + 'tool': { + 'tool_name': 'swh-deposit', + 'tool_version': '0.0.1', + 'tool_configuration': { + 'sword_version': '2' + } + } } diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -37,8 +37,27 @@ expected_meta = { 'origin': { - 'url': 'some-external-id', - 'type': 'hal' + 'url': 'https://hal.archives-ouvertes.fr/some-external-id', + 'type': 'deposit' + }, + 'origin_metadata': { + 'metadata': { + '{http://www.w3.org/2005/Atom}external_identifier': + 'some-external-id' + }, + 'provider': { + 'provider_name': '', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal.archives-ouvertes.fr/', + 'metadata': {} + }, + 'tool': { + 'tool_name': 'swh-deposit', + 'tool_version': '0.0.1', + 'tool_configuration': { + 'sword_version': '2' + } + } }, 'revision': { 'synthetic': True, @@ -51,7 +70,10 @@ 'fullname': '', 'email': '', 'name': '' }, 'date': None, - 'metadata': {}, + 'metadata': { + '{http://www.w3.org/2005/Atom}external_identifier': + 'some-external-id' + }, 'type': 'tar' }, 'occurrence': { diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -16,7 +16,8 @@ from rest_framework import status from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI -from swh.deposit.models import DepositClient, DepositCollection +from swh.deposit.models import DepositClient, DepositCollection, Deposit +from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT @@ -97,7 +98,7 @@ super().tearDown() shutil.rmtree(self.root_path) - def create_simple_binary_deposit(self, status_partial=False): + def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', @@ -160,12 +161,14 @@ deposit_request_types[deposit_request_type] = drt _name = 'hal' + _url = 'https://hal.archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, - password=_name) + password=_name, + url=_url) _client.collections = [_collection.id] _client.save() @@ -225,6 +228,57 @@ anotherthing """ + self.atom_entry_data2 = b""" + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 1785io25c695 + 2017-10-07T15:17:08Z + some awesome author + """ + + self.codemeta_entry_data0 = b""" + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 1785io25c695 + 2017-10-07T15:17:08Z + some awesome author + description + key-word 1 + """ + + self.codemeta_entry_data1 = b""" + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + DSP programming,Web + 2017-05-03T16:08:47+02:00 + this is the description + 1 + phpstorm + stable + php + python + C + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + +""" + def create_deposit_with_status_rejected(self): url = reverse(COL_IRI, args=[self.collection.name]) @@ -272,6 +326,30 @@ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id + def create_deposit_partial_with_data_in_args(self, data): + """Create a simple deposit (1 request) in `partial` state with the data + or metadata as an argument and returns its new identifier. + + Args: + data: atom entry + + Returns: + deposit id + + """ + response = self.client.post( + reverse(COL_IRI, args=[self.collection.name]), + content_type='application/atom+xml;type=entry', + data=data, + HTTP_SLUG='external-id', + HTTP_IN_PROGRESS='true') + + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content[ + '{http://www.w3.org/2005/Atom}deposit_id'] + return deposit_id + def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `ready` (by default). @@ -308,3 +386,27 @@ deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id + + def add_metadata_to_deposit(self, deposit_id, status_partial=False): + """Add metadata to deposit. + + """ + # when + response = self.client.post( + reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), + content_type='application/atom+xml;type=entry', + data=self.codemeta_entry_data1, + HTTP_SLUG='external-id', + HTTP_IN_PROGRESS=status_partial) + assert response.status_code == status.HTTP_201_CREATED + # then + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit is not None + + deposit_requests = DepositRequest.objects.filter(deposit=deposit) + assert deposit_requests is not [] + + for dr in deposit_requests: + if dr.type.name == 'metadata': + assert deposit_requests[0].metadata is not {} + return deposit_id diff --git a/swh/deposit/tests/test_loader.py b/swh/deposit/tests/test_loader.py --- a/swh/deposit/tests/test_loader.py +++ b/swh/deposit/tests/test_loader.py @@ -38,6 +38,7 @@ self.state = { 'origin': [], 'origin_visit': [], + 'origin_metadata': [], 'content': [], 'directory': [], 'revision': [], @@ -73,6 +74,18 @@ self._add('origin_visit', [origin_visit]) return origin_visit + def send_origin_metadata(self, origin_id, visit_date, provider, tool, + metadata): + origin_metadata = { + 'origin_id': origin_id, + 'visit_date': visit_date, + 'provider': provider, + 'tool': tool, + 'metadata': metadata + } + self._add('origin_metadata', [origin_metadata]) + return origin_metadata + def maybe_load_contents(self, contents): self._add('content', contents) @@ -171,7 +184,8 @@ def get_metadata(self, metadata_url, log=None): r = me.client.get(metadata_url) - return json.loads(r.content.decode('utf-8')) + data = json.loads(r.content.decode('utf-8')) + return data def update_deposit_status(self, update_status_url, status, revision_id=None): @@ -218,3 +232,58 @@ # FIXME enrich state introspection # expected_revisions = {} # self.assertRevisionsOk(expected_revisions) + + @istest + def inject_deposit_verify_metadata(self): + """Load a deposit with metadata, test metadata integrity + + """ + self.deposit_metadata_id = self.add_metadata_to_deposit( + self.deposit_id) + args = [self.collection.name, self.deposit_metadata_id] + + archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) + deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) + deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) + + # when + self.loader.load(archive_url=archive_url, + deposit_meta_url=deposit_meta_url, + deposit_update_url=deposit_update_url) + + # then + self.assertEquals(len(self.loader.state['content']), 1) + self.assertEquals(len(self.loader.state['directory']), 1) + self.assertEquals(len(self.loader.state['revision']), 1) + self.assertEquals(len(self.loader.state['release']), 0) + self.assertEquals(len(self.loader.state['occurrence']), 1) + self.assertEquals(len(self.loader.state['origin_metadata']), 1) + atom = '{http://www.w3.org/2005/Atom}' + codemeta = '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}' + expected_origin_metadata = { + atom + 'author': { + atom + 'email': 'hal@ccsd.cnrs.fr', + atom + 'name': 'HAL' + }, + codemeta + 'url': + 'https://hal-test.archives-ouvertes.fr/hal-01243065', + codemeta + 'runtimePlatform': 'phpstorm', + codemeta + 'license': { + codemeta + 'name': + 'CeCILL Free Software License Agreement v1.1' + }, + codemeta + 'programmingLanguage': 'C', + codemeta + 'applicationCategory': 'test', + codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00', + codemeta + 'version': 1, + atom + 'external_identifier': 'hal-01243065', + atom + 'title': 'Composing a Web of Audio Applications', + codemeta + 'description': 'this is the description', + atom + 'id': 'hal-01243065', + atom + 'client': 'hal', + codemeta + 'keywords': 'DSP programming,Web', + codemeta + 'developmentStatus': 'stable' + } + + self.assertEquals(self.loader.state['origin_metadata'][0]['metadata'], + expected_origin_metadata)