diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -9,6 +9,7 @@
getting-started.md
spec-api.md
+ metadata.md
spec-injection.md
dev-info.md
sys-info.md
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -71,7 +71,7 @@
"""
ADDITIONAL_CONFIG = {
- 'extraction_dir': ('str', '/tmp/swh-deposit/archive/')
+ 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'),
}
def __init__(self):
@@ -120,6 +120,28 @@
"""Class in charge of aggregating metadata on a deposit.
"""
+ ADDITIONAL_CONFIG = {
+ 'provider': ('dict', {
+ 'provider_name': '',
+ 'provider_type': 'deposit_client',
+ 'provider_url': '',
+ 'metadata': {
+ }
+ }),
+ 'tool': ('dict', {
+ 'tool_name': 'swh-deposit',
+ 'tool_version': '0.0.1',
+ 'tool_configuration': {
+ 'sword_version': '2'
+ }
+ })
+ }
+
+ def __init__(self):
+ super().__init__()
+ self.provider = self.config['provider']
+ self.tool = self.config['tool']
+
def _aggregate_metadata(self, deposit, metadata_requests):
"""Retrieve and aggregates metadata information.
@@ -143,15 +165,14 @@
"""
data = {}
- metadata_requests = []
# Retrieve tarballs/metadata information
- metadata = self._aggregate_metadata(deposit, metadata_requests)
+ metadata = self._aggregate_metadata(deposit, requests)
# Read information metadata
data['origin'] = {
- 'type': deposit.collection.name,
- 'url': deposit.external_id,
+ 'type': 'deposit',
+ 'url': deposit.client.url + deposit.external_id,
}
# revision
@@ -163,6 +184,10 @@
'email': deposit.client.email,
}
+ # metadata provider
+ self.provider['provider_name'] = deposit.client.last_name
+ self.provider['provider_url'] = deposit.client.url
+
revision_type = 'tar'
revision_msg = '%s: Deposit %s in collection %s' % (
fullname, deposit.id, deposit.collection.name)
@@ -188,6 +213,11 @@
data['occurrence'] = {
'branch': 'master'
}
+ data['origin_metadata'] = {
+ 'provider': self.provider,
+ 'tool': self.tool,
+ 'metadata': metadata
+ }
return data
diff --git a/swh/deposit/injection/loader.py b/swh/deposit/injection/loader.py
--- a/swh/deposit/injection/loader.py
+++ b/swh/deposit/injection/loader.py
@@ -135,14 +135,34 @@
visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
revision = metadata['revision']
occurrence = metadata['occurrence']
+ self.origin_metadata = metadata['origin_metadata']
self.client.update_deposit_status(deposit_update_url, 'injecting')
+
super().prepare(tar_path=archive,
origin=origin,
visit_date=visit_date,
revision=revision,
occurrences=[occurrence])
+ def store_metadata(self):
+ """Storing the origin_metadata during the load processus.
+
+ Fetching tool and metadata_provider from storage and adding the
+ metadata associated to the current origin.
+
+ """
+ origin_id = self.origin_id
+ visit_date = self.visit_date
+ provider = self.origin_metadata['provider']
+ tool = self.origin_metadata['tool']
+ metadata = self.origin_metadata['metadata']
+ try:
+ self.send_origin_metadata(origin_id, visit_date, provider,
+ tool, metadata)
+ except:
+ self.log.exception('Problem when storing origin_metadata')
+
def post_load(self, success=True):
"""Updating the deposit's status according to its loading status.
diff --git a/swh/deposit/migrations/0006_depositclient_url.py b/swh/deposit/migrations/0006_depositclient_url.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/migrations/0006_depositclient_url.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.10.7 on 2017-11-07 13:12
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('deposit', '0005_auto_20171019_1436'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='depositclient',
+ name='url',
+ field=models.TextField(default='https://hal.archives-ouvertes.fr/'),
+ preserve_default=False,
+ ),
+ ]
diff --git a/swh/deposit/models.py b/swh/deposit/models.py
--- a/swh/deposit/models.py
+++ b/swh/deposit/models.py
@@ -72,6 +72,7 @@
"""
collections = ArrayField(models.IntegerField(), null=True)
objects = UserManager()
+ url = models.TextField(null=False)
class Meta:
db_table = 'deposit_client'
diff --git a/swh/deposit/tests/__init__.py b/swh/deposit/tests/__init__.py
--- a/swh/deposit/tests/__init__.py
+++ b/swh/deposit/tests/__init__.py
@@ -12,6 +12,20 @@
TEST_CONFIG = {
'max_upload_size': 500,
'extraction_dir': '/tmp/swh-deposit/test/extraction-dir',
+ 'provider': {
+ 'provider_name': '',
+ 'provider_type': 'deposit_client',
+ 'provider_url': '',
+ 'metadata': {
+ }
+ },
+ 'tool': {
+ 'tool_name': 'swh-deposit',
+ 'tool_version': '0.0.1',
+ 'tool_configuration': {
+ 'sword_version': '2'
+ }
+ }
}
diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_read_metadata.py
@@ -37,8 +37,27 @@
expected_meta = {
'origin': {
- 'url': 'some-external-id',
- 'type': 'hal'
+ 'url': 'https://hal.archives-ouvertes.fr/some-external-id',
+ 'type': 'deposit'
+ },
+ 'origin_metadata': {
+ 'metadata': {
+ '{http://www.w3.org/2005/Atom}external_identifier':
+ 'some-external-id'
+ },
+ 'provider': {
+ 'provider_name': '',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal.archives-ouvertes.fr/',
+ 'metadata': {}
+ },
+ 'tool': {
+ 'tool_name': 'swh-deposit',
+ 'tool_version': '0.0.1',
+ 'tool_configuration': {
+ 'sword_version': '2'
+ }
+ }
},
'revision': {
'synthetic': True,
@@ -51,7 +70,10 @@
'fullname': '', 'email': '', 'name': ''
},
'date': None,
- 'metadata': {},
+ 'metadata': {
+ '{http://www.w3.org/2005/Atom}external_identifier':
+ 'some-external-id'
+ },
'type': 'tar'
},
'occurrence': {
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -16,7 +16,8 @@
from rest_framework import status
from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI
-from swh.deposit.models import DepositClient, DepositCollection
+from swh.deposit.models import DepositClient, DepositCollection, Deposit
+from swh.deposit.models import DepositRequest
from swh.deposit.models import DepositRequestType
from swh.deposit.parsers import parse_xml
from swh.deposit.settings.testing import MEDIA_ROOT
@@ -97,7 +98,7 @@
super().tearDown()
shutil.rmtree(self.root_path)
- def create_simple_binary_deposit(self, status_partial=False):
+ def create_simple_binary_deposit(self, status_partial=True):
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/zip',
@@ -160,12 +161,14 @@
deposit_request_types[deposit_request_type] = drt
_name = 'hal'
+ _url = 'https://hal.archives-ouvertes.fr/'
# set collection up
_collection = DepositCollection(name=_name)
_collection.save()
# set user/client up
_client = DepositClient.objects.create_user(username=_name,
- password=_name)
+ password=_name,
+ url=_url)
_client.collections = [_collection.id]
_client.save()
@@ -225,6 +228,57 @@
anotherthing
"""
+ self.atom_entry_data2 = b"""
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 1785io25c695
+ 2017-10-07T15:17:08Z
+ some awesome author
+ """
+
+ self.codemeta_entry_data0 = b"""
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 1785io25c695
+ 2017-10-07T15:17:08Z
+ some awesome author
+ description
+ key-word 1
+ """
+
+ self.codemeta_entry_data1 = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming,Web
+ 2017-05-03T16:08:47+02:00
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+"""
+
def create_deposit_with_status_rejected(self):
url = reverse(COL_IRI, args=[self.collection.name])
@@ -272,6 +326,30 @@
'{http://www.w3.org/2005/Atom}deposit_id']
return deposit_id
+ def create_deposit_partial_with_data_in_args(self, data):
+ """Create a simple deposit (1 request) in `partial` state with the data
+ or metadata as an argument and returns its new identifier.
+
+ Args:
+ data: atom entry
+
+ Returns:
+ deposit id
+
+ """
+ response = self.client.post(
+ reverse(COL_IRI, args=[self.collection.name]),
+ content_type='application/atom+xml;type=entry',
+ data=data,
+ HTTP_SLUG='external-id',
+ HTTP_IN_PROGRESS='true')
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content[
+ '{http://www.w3.org/2005/Atom}deposit_id']
+ return deposit_id
+
def _update_deposit_with_status(self, deposit_id, status_partial=False):
"""Add to a given deposit another archive and update its current
status to `ready` (by default).
@@ -308,3 +386,27 @@
deposit_id = self._update_deposit_with_status(
deposit_id, status_partial=True)
return deposit_id
+
+ def add_metadata_to_deposit(self, deposit_id, status_partial=False):
+ """Add metadata to deposit.
+
+ """
+ # when
+ response = self.client.post(
+ reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
+ content_type='application/atom+xml;type=entry',
+ data=self.codemeta_entry_data1,
+ HTTP_SLUG='external-id',
+ HTTP_IN_PROGRESS=status_partial)
+ assert response.status_code == status.HTTP_201_CREATED
+ # then
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit is not None
+
+ deposit_requests = DepositRequest.objects.filter(deposit=deposit)
+ assert deposit_requests is not []
+
+ for dr in deposit_requests:
+ if dr.type.name == 'metadata':
+ assert deposit_requests[0].metadata is not {}
+ return deposit_id
diff --git a/swh/deposit/tests/test_loader.py b/swh/deposit/tests/test_loader.py
--- a/swh/deposit/tests/test_loader.py
+++ b/swh/deposit/tests/test_loader.py
@@ -38,6 +38,7 @@
self.state = {
'origin': [],
'origin_visit': [],
+ 'origin_metadata': [],
'content': [],
'directory': [],
'revision': [],
@@ -73,6 +74,18 @@
self._add('origin_visit', [origin_visit])
return origin_visit
+ def send_origin_metadata(self, origin_id, visit_date, provider, tool,
+ metadata):
+ origin_metadata = {
+ 'origin_id': origin_id,
+ 'visit_date': visit_date,
+ 'provider': provider,
+ 'tool': tool,
+ 'metadata': metadata
+ }
+ self._add('origin_metadata', [origin_metadata])
+ return origin_metadata
+
def maybe_load_contents(self, contents):
self._add('content', contents)
@@ -171,7 +184,8 @@
def get_metadata(self, metadata_url, log=None):
r = me.client.get(metadata_url)
- return json.loads(r.content.decode('utf-8'))
+ data = json.loads(r.content.decode('utf-8'))
+ return data
def update_deposit_status(self, update_status_url, status,
revision_id=None):
@@ -218,3 +232,58 @@
# FIXME enrich state introspection
# expected_revisions = {}
# self.assertRevisionsOk(expected_revisions)
+
+ @istest
+ def inject_deposit_verify_metadata(self):
+ """Load a deposit with metadata, test metadata integrity
+
+ """
+ self.deposit_metadata_id = self.add_metadata_to_deposit(
+ self.deposit_id)
+ args = [self.collection.name, self.deposit_metadata_id]
+
+ archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
+ deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
+ deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
+
+ # when
+ self.loader.load(archive_url=archive_url,
+ deposit_meta_url=deposit_meta_url,
+ deposit_update_url=deposit_update_url)
+
+ # then
+ self.assertEquals(len(self.loader.state['content']), 1)
+ self.assertEquals(len(self.loader.state['directory']), 1)
+ self.assertEquals(len(self.loader.state['revision']), 1)
+ self.assertEquals(len(self.loader.state['release']), 0)
+ self.assertEquals(len(self.loader.state['occurrence']), 1)
+ self.assertEquals(len(self.loader.state['origin_metadata']), 1)
+ atom = '{http://www.w3.org/2005/Atom}'
+ codemeta = '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}'
+ expected_origin_metadata = {
+ atom + 'author': {
+ atom + 'email': 'hal@ccsd.cnrs.fr',
+ atom + 'name': 'HAL'
+ },
+ codemeta + 'url':
+ 'https://hal-test.archives-ouvertes.fr/hal-01243065',
+ codemeta + 'runtimePlatform': 'phpstorm',
+ codemeta + 'license': {
+ codemeta + 'name':
+ 'CeCILL Free Software License Agreement v1.1'
+ },
+ codemeta + 'programmingLanguage': 'C',
+ codemeta + 'applicationCategory': 'test',
+ codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00',
+ codemeta + 'version': 1,
+ atom + 'external_identifier': 'hal-01243065',
+ atom + 'title': 'Composing a Web of Audio Applications',
+ codemeta + 'description': 'this is the description',
+ atom + 'id': 'hal-01243065',
+ atom + 'client': 'hal',
+ codemeta + 'keywords': 'DSP programming,Web',
+ codemeta + 'developmentStatus': 'stable'
+ }
+
+ self.assertEquals(self.loader.state['origin_metadata'][0]['metadata'],
+ expected_origin_metadata)