diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
index 6d28f106..04e63692 100644
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -1,209 +1,250 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import os
import shutil
import tempfile
from contextlib import contextmanager
+from dateutil import parser
from django.http import FileResponse
from rest_framework import status
from swh.core import tarball
from swh.model import identifiers
from . import DepositReadMixin
from ...config import SWH_PERSON, ARCHIVE_TYPE
from ..common import SWHGetDepositAPI, SWHPrivateAPIView
from ...models import Deposit
@contextmanager
def aggregate_tarballs(extraction_dir, archive_paths):
"""Aggregate multiple tarballs into one and returns this new archive's
path.
Args:
extraction_dir (path): Path to use for the tarballs computation
archive_paths ([str]): Deposit's archive paths
Returns:
Tuple (directory to clean up, archive path (aggregated or not))
"""
if len(archive_paths) > 1:
# need to rebuild one archive from multiple ones
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix='swh.deposit-',
dir=extraction_dir)
# root folder to build an aggregated tarball
aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate')
os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True)
# uncompress in a temporary location all archives
for archive_path in archive_paths:
tarball.uncompress(archive_path, aggregated_tarball_rootdir)
# Aggregate into one big tarball the multiple smaller ones
temp_tarpath = tarball.compress(
aggregated_tarball_rootdir + '.zip',
nature='zip',
dirpath_or_files=aggregated_tarball_rootdir)
# can already clean up temporary directory
shutil.rmtree(aggregated_tarball_rootdir)
try:
yield temp_tarpath
finally:
shutil.rmtree(dir_path)
else: # only 1 archive, no need to do fancy actions (and no cleanup step)
yield archive_paths[0]
class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView,
DepositReadMixin):
"""Dedicated class to read a deposit's raw archives content.
Only GET is supported.
"""
ADDITIONAL_CONFIG = {
'extraction_dir': ('str', '/tmp/swh-deposit/archive/'),
}
def __init__(self):
super().__init__()
self.extraction_dir = self.config['extraction_dir']
if not os.path.exists(self.extraction_dir):
os.makedirs(self.extraction_dir)
def process_get(self, req, collection_name, deposit_id):
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
req (Request):
collection_name (str): Collection owning the deposit
deposit_id (id): Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
archive_paths = [r.archive.path for r in self._deposit_requests(
deposit_id, request_type=ARCHIVE_TYPE)]
with aggregate_tarballs(self.extraction_dir, archive_paths) as path:
return FileResponse(open(path, 'rb'),
status=status.HTTP_200_OK,
content_type='application/octet-stream')
class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView,
DepositReadMixin):
"""Class in charge of aggregating metadata on a deposit.
"""
ADDITIONAL_CONFIG = {
'provider': ('dict', {
# 'provider_name': '', # those are not set since read from the
# 'provider_url': '', # deposit's client
'provider_type': 'deposit_client',
'metadata': {}
}),
'tool': ('dict', {
'name': 'swh-deposit',
'version': '0.0.1',
'configuration': {
'sword_version': '2'
}
})
}
def __init__(self):
super().__init__()
self.provider = self.config['provider']
self.tool = self.config['tool']
def _retrieve_url(self, deposit, metadata):
client_domain = deposit.client.domain
for field in metadata:
if 'url' in field:
if client_domain in metadata[field]:
return metadata[field]
+ def _prepare_date(self, date):
+ """Prepare date fields as normalized swh date
+
+ """
+ if isinstance(date, list):
+ date = date[0]
+ if isinstance(date, str):
+ date = parser.parse(date)
+
+ return identifiers.normalize_timestamp(date)
+
+ def _compute_date(self, deposit, metadata):
+ """Compute the date to use as a tuple of author date, committer date.
+ Each of those date are swh normalized immediately.
+
+ Args:
+ deposit (Deposit): Deposit model representation
+ metadata (Dict): Metadata dict representation
+
+ Returns:
+ Tuple of author date, committer date. Those dates are
+ swh normalized.
+
+ """
+ commit_date = metadata.get('codemeta:datePublished')
+ author_date = metadata.get('codemeta:dateCreated')
+
+ if author_date and commit_date:
+ t = (author_date, commit_date)
+ elif commit_date:
+ t = (commit_date, commit_date)
+ elif author_date:
+ t = (author_date, author_date)
+ else:
+ date = deposit.complete_date
+ t = (date, date)
+ return (
+ self._prepare_date(t[0]), self._prepare_date(t[1]))
+
def metadata_read(self, deposit):
"""Read and aggregate multiple data on deposit into one unified data
dictionary.
Args:
deposit (Deposit): Deposit concerned by the data aggregation.
Returns:
Dictionary of data representing the deposit to inject in swh.
"""
data = {}
metadata = self._metadata_get(deposit)
# create origin_url from metadata only after deposit_check validates it
origin_url = self._retrieve_url(deposit, metadata)
# Read information metadata
data['origin'] = {
'type': 'deposit',
'url': origin_url
}
# revision
fullname = deposit.client.username
author_committer = SWH_PERSON
# metadata provider
self.provider['provider_name'] = deposit.client.last_name
self.provider['provider_url'] = deposit.client.provider_url
revision_type = 'tar'
revision_msg = '%s: Deposit %s in collection %s' % (
fullname, deposit.id, deposit.collection.name)
- complete_date = identifiers.normalize_timestamp(deposit.complete_date)
+
+ author_date, commit_date = self._compute_date(deposit, metadata)
data['revision'] = {
'synthetic': True,
- 'date': complete_date,
- 'committer_date': complete_date,
+ 'date': author_date,
+ 'committer_date': commit_date,
'author': author_committer,
'committer': author_committer,
'type': revision_type,
'message': revision_msg,
'metadata': metadata,
}
if deposit.parent:
swh_persistent_id = deposit.parent.swh_id
persistent_identifier = identifiers.parse_persistent_identifier(
swh_persistent_id)
parent_revision = persistent_identifier.object_id
data['revision']['parents'] = [parent_revision]
data['branch_name'] = 'master'
data['origin_metadata'] = {
'provider': self.provider,
'tool': self.tool,
'metadata': metadata
}
return data
def process_get(self, req, collection_name, deposit_id):
deposit = Deposit.objects.get(pk=deposit_id)
data = self.metadata_read(deposit)
d = {}
if data:
d = json.dumps(data)
return status.HTTP_200_OK, d, 'application/json'
diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py
index e35200bd..dcfbdaf9 100644
--- a/swh/deposit/tests/api/test_deposit_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_read_metadata.py
@@ -1,205 +1,713 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.urls import reverse
from rest_framework import status
from rest_framework.test import APITestCase
from swh.deposit.models import Deposit
from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS
from swh.deposit.config import DEPOSIT_STATUS_PARTIAL
from ...config import SWH_PERSON
from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine
class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase,
CommonCreationRoutine):
"""Deposit access to read metadata information on deposit.
"""
def test_read_metadata(self):
"""Private metadata read api to existing deposit should return metadata
"""
deposit_id = self.create_deposit_partial()
url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
args=[self.collection.name, deposit_id])
response = self.client.get(url)
self.assertEqual(response.status_code,
status.HTTP_200_OK)
self.assertEqual(response._headers['content-type'][1],
'application/json')
data = response.json()
expected_meta = {
'origin': {
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id',
'type': 'deposit'
},
'origin_metadata': {
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
},
'provider': {
'provider_name': 'hal',
'provider_type': 'deposit_client',
'provider_url': 'https://hal-test.archives-ouvertes.fr/',
'metadata': {}
},
'tool': {
'name': 'swh-deposit',
'version': '0.0.1',
'configuration': {
'sword_version': '2'
}
}
},
'revision': {
'synthetic': True,
- 'committer_date': None,
+ 'committer_date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'message': 'hal: Deposit %s in collection hal' % deposit_id,
'author': SWH_PERSON,
'committer': SWH_PERSON,
- 'date': None,
+ 'date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
},
'type': 'tar'
},
'branch_name': 'master',
}
self.assertEqual(data, expected_meta)
def test_read_metadata_revision_with_parent(self):
"""Private read metadata to a deposit (with parent) returns metadata
"""
swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa'
swh_persistent_id = 'swh:1:rev:%s' % swh_id
deposit_id1 = self.create_deposit_with_status(
status=DEPOSIT_STATUS_LOAD_SUCCESS,
external_id='some-external-id',
swh_id=swh_persistent_id)
deposit_parent = Deposit.objects.get(pk=deposit_id1)
self.assertEqual(deposit_parent.swh_id, swh_persistent_id)
self.assertEqual(deposit_parent.external_id, 'some-external-id')
self.assertEqual(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS)
deposit_id = self.create_deposit_partial(
external_id='some-external-id')
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.external_id, 'some-external-id')
self.assertEqual(deposit.swh_id, None)
self.assertEqual(deposit.parent, deposit_parent)
self.assertEqual(deposit.status, DEPOSIT_STATUS_PARTIAL)
url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
args=[self.collection.name, deposit_id])
response = self.client.get(url)
self.assertEqual(response.status_code,
status.HTTP_200_OK)
self.assertEqual(response._headers['content-type'][1],
'application/json')
data = response.json()
expected_meta = {
'origin': {
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id',
'type': 'deposit'
},
'origin_metadata': {
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
},
'provider': {
'provider_name': 'hal',
'provider_type': 'deposit_client',
'provider_url': 'https://hal-test.archives-ouvertes.fr/',
'metadata': {}
},
'tool': {
'name': 'swh-deposit',
'version': '0.0.1',
'configuration': {
'sword_version': '2'
}
}
},
'revision': {
'synthetic': True,
- 'date': None,
- 'committer_date': None,
+ 'date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
+ 'committer_date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'author': SWH_PERSON,
'committer': SWH_PERSON,
'type': 'tar',
'message': 'hal: Deposit %s in collection hal' % deposit_id,
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
},
'parents': [swh_id]
},
'branch_name': 'master',
}
self.assertEqual(data, expected_meta)
+ def test_read_metadata_3(self):
+ """dateCreated/datePublished provided, revision uses author/committer date
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming,Web
+ 2015-04-06T17:08:47+02:00
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+ 2017-05-03T16:08:47+02:00
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+""" # noqa
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00',
+ 'codemeta:datePublished': '2017-05-03T16:08:47+02:00',
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming,Web',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1493820527
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1428332927
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
+ def test_read_metadata_4(self):
+ """dateCreated/datePublished not provided, revision uses complete_date
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+""" # noqa
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ # will use the deposit completed date as fallback date
+ deposit = Deposit.objects.get(pk=deposit_id)
+ deposit.complete_date = '2016-04-06'
+ deposit.save()
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 0,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1459900800
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 0,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1459900800
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
+ def test_read_metadata_5(self):
+ """dateCreated/datePublished provided, revision uses author/committer
+ date
+
+ If multiple dateCreated provided, the first occurrence (of
+ dateCreated) is selected. If multiple datePublished provided,
+ the first occurrence (of datePublished) is selected.
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming,Web
+ 2015-04-06T17:08:47+02:00
+ 2016-04-06T17:08:47+02:00
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+ 2017-05-03T16:08:47+02:00
+ 2018-05-03T16:08:47+02:00
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+""" # noqa
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:dateCreated': [
+ '2015-04-06T17:08:47+02:00',
+ '2016-04-06T17:08:47+02:00',
+ ],
+ 'codemeta:datePublished': [
+ '2017-05-03T16:08:47+02:00',
+ '2018-05-03T16:08:47+02:00',
+ ],
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming,Web',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1493820527
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1428332927
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
def test_access_to_nonexisting_deposit_returns_404_response(self):
"""Read unknown collection should return a 404 response
"""
unknown_id = '999'
url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
args=[self.collection.name, unknown_id])
response = self.client.get(url)
self.assertEqual(response.status_code,
status.HTTP_404_NOT_FOUND)
self.assertIn('Deposit with id %s does not exist' % unknown_id,
response.content.decode('utf-8'))
def test_access_to_nonexisting_collection_returns_404_response(self):
"""Read unknown deposit should return a 404 response
"""
collection_name = 'non-existing'
deposit_id = self.create_deposit_partial()
url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
args=[collection_name, deposit_id])
response = self.client.get(url)
self.assertEqual(response.status_code,
status.HTTP_404_NOT_FOUND)
self.assertIn('Unknown collection name %s' % collection_name,
response.content.decode('utf-8'),)
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
index 5c8b70fa..ab56c451 100644
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -1,564 +1,565 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import base64
import hashlib
import os
import shutil
import tarfile
import tempfile
from django.urls import reverse
from django.test import TestCase
from io import BytesIO
import pytest
from rest_framework import status
from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI,
DEPOSIT_STATUS_PARTIAL,
DEPOSIT_STATUS_VERIFIED,
DEPOSIT_STATUS_REJECTED,
DEPOSIT_STATUS_DEPOSITED)
from swh.deposit.models import DepositClient, DepositCollection, Deposit
from swh.deposit.models import DepositRequest
from swh.deposit.parsers import parse_xml
from swh.deposit.settings.testing import MEDIA_ROOT
from swh.core import tarball
def compute_info(archive_path):
"""Given a path, compute information on path.
"""
with open(archive_path, 'rb') as f:
length = 0
sha1sum = hashlib.sha1()
md5sum = hashlib.md5()
data = b''
for chunk in f:
sha1sum.update(chunk)
md5sum.update(chunk)
length += len(chunk)
data += chunk
return {
'dir': os.path.dirname(archive_path),
'name': os.path.basename(archive_path),
'path': archive_path,
'length': length,
'sha1sum': sha1sum.hexdigest(),
'md5sum': md5sum.hexdigest(),
'data': data
}
def _compress(path, extension, dir_path):
"""Compress path according to extension
"""
if extension == 'zip' or extension == 'tar':
return tarball.compress(path, extension, dir_path)
elif '.' in extension:
split_ext = extension.split('.')
if split_ext[0] != 'tar':
raise ValueError(
'Development error, only zip or tar archive supported, '
'%s not supported' % extension)
# deal with specific tar
mode = split_ext[1]
supported_mode = ['xz', 'gz', 'bz2']
if mode not in supported_mode:
raise ValueError(
'Development error, only %s supported, %s not supported' % (
supported_mode, mode))
files = tarball._ls(dir_path)
with tarfile.open(path, 'w:%s' % mode) as t:
for fpath, fname in files:
t.add(fpath, arcname=fname, recursive=False)
return path
def create_arborescence_archive(root_path, archive_name, filename, content,
up_to_size=None, extension='zip'):
"""Build an archive named archive_name in the root_path.
This archive contains one file named filename with the content content.
Args:
root_path (str): Location path of the archive to create
archive_name (str): Archive's name (without extension)
filename (str): Archive's content is only one filename
content (bytes): Content of the filename
up_to_size (int | None): Fill in the blanks size to oversize
or complete an archive's size
extension (str): Extension of the archive to write (default is zip)
Returns:
dict with the keys:
- dir: the directory of that archive
- path: full path to the archive
- sha1sum: archive's sha1sum
- length: archive's length
"""
os.makedirs(root_path, exist_ok=True)
archive_path_dir = tempfile.mkdtemp(dir=root_path)
dir_path = os.path.join(archive_path_dir, archive_name)
os.mkdir(dir_path)
filepath = os.path.join(dir_path, filename)
_length = len(content)
count = 0
batch_size = 128
with open(filepath, 'wb') as f:
f.write(content)
if up_to_size: # fill with blank content up to a given size
count += _length
while count < up_to_size:
f.write(b'0'*batch_size)
count += batch_size
_path = '%s.%s' % (dir_path, extension)
_path = _compress(_path, extension, dir_path)
return compute_info(_path)
def create_archive_with_archive(root_path, name, archive):
"""Create an archive holding another.
"""
invalid_archive_path = os.path.join(root_path, name)
with tarfile.open(invalid_archive_path, 'w:gz') as _archive:
_archive.add(archive['path'], arcname=archive['name'])
return compute_info(invalid_archive_path)
@pytest.mark.fs
class FileSystemCreationRoutine(TestCase):
"""Mixin intended for tests needed to tamper with archives.
"""
def setUp(self):
"""Define the test client and other test variables."""
super().setUp()
self.root_path = '/tmp/swh-deposit/test/build-zip/'
os.makedirs(self.root_path, exist_ok=True)
self.archive = create_arborescence_archive(
self.root_path, 'archive1', 'file1', b'some content in file')
self.atom_entry = b"""
Awesome Compiler
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
1785io25c695
2017-10-07T15:17:08Z
some awesome author
https://hal-test.archives-ouvertes.fr
"""
def tearDown(self):
super().tearDown()
shutil.rmtree(self.root_path)
def create_simple_binary_deposit(self, status_partial=True):
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/zip',
data=self.archive['data'],
CONTENT_LENGTH=self.archive['length'],
HTTP_MD5SUM=self.archive['md5sum'],
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial,
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
self.archive['name'], ))
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
_status = response_content['deposit_status']
if status_partial:
expected_status = DEPOSIT_STATUS_PARTIAL
else:
expected_status = DEPOSIT_STATUS_VERIFIED
self.assertEqual(_status, expected_status)
deposit_id = int(response_content['deposit_id'])
return deposit_id
def create_complex_binary_deposit(self, status_partial=False):
deposit_id = self.create_simple_binary_deposit(
status_partial=True)
# Add a second archive to the deposit
# update its status to DEPOSIT_STATUS_VERIFIED
response = self.client.post(
reverse(EM_IRI, args=[self.collection.name, deposit_id]),
content_type='application/zip',
data=self.archive2['data'],
CONTENT_LENGTH=self.archive2['length'],
HTTP_MD5SUM=self.archive2['md5sum'],
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial,
HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip')
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content['deposit_id'])
return deposit_id
def create_deposit_archive_with_archive(self, archive_extension):
# we create the holding archive to a given extension
archive = create_arborescence_archive(
self.root_path, 'archive1', 'file1', b'some content in file',
extension=archive_extension)
# now we create an archive holding the first created archive
invalid_archive = create_archive_with_archive(
self.root_path, 'invalid.tar.gz', archive)
# we deposit it
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/x-tar',
data=invalid_archive['data'],
CONTENT_LENGTH=invalid_archive['length'],
HTTP_MD5SUM=invalid_archive['md5sum'],
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=False,
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
invalid_archive['name'], ))
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
_status = response_content['deposit_status']
self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED)
deposit_id = int(response_content['deposit_id'])
return deposit_id
def update_binary_deposit(self, deposit_id, status_partial=False):
# update existing deposit with atom entry metadata
response = self.client.post(
reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
content_type='application/atom+xml;type=entry',
data=self.codemeta_entry_data1,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial)
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
_status = response_content['deposit_status']
if status_partial:
expected_status = DEPOSIT_STATUS_PARTIAL
else:
expected_status = DEPOSIT_STATUS_DEPOSITED
self.assertEqual(_status, expected_status)
deposit_id = int(response_content['deposit_id'])
return deposit_id
@pytest.mark.fs
class BasicTestCase(TestCase):
"""Mixin intended for data setup purposes (user, collection, etc...)
"""
def setUp(self):
"""Define the test client and other test variables."""
super().setUp()
# expanding diffs in tests
self.maxDiff = None
# basic minimum test data
_name = 'hal'
_provider_url = 'https://hal-test.archives-ouvertes.fr/'
_domain = 'archives-ouvertes.fr/'
# set collection up
_collection = DepositCollection(name=_name)
_collection.save()
# set user/client up
_client = DepositClient.objects.create_user(username=_name,
password=_name,
provider_url=_provider_url,
domain=_domain)
_client.collections = [_collection.id]
_client.last_name = _name
_client.save()
self.collection = _collection
self.user = _client
self.username = _name
self.userpass = _name
def tearDown(self):
super().tearDown()
# Clean up uploaded files in temporary directory (tests have
# their own media root folder)
if os.path.exists(MEDIA_ROOT):
for d in os.listdir(MEDIA_ROOT):
shutil.rmtree(os.path.join(MEDIA_ROOT, d))
class WithAuthTestCase(TestCase):
"""Mixin intended for testing the api with basic authentication.
"""
def setUp(self):
super().setUp()
_token = '%s:%s' % (self.username, self.userpass)
token = base64.b64encode(_token.encode('utf-8'))
authorization = 'Basic %s' % token.decode('utf-8')
self.client.credentials(HTTP_AUTHORIZATION=authorization)
def tearDown(self):
super().tearDown()
self.client.credentials()
class CommonCreationRoutine(TestCase):
"""Mixin class to share initialization routine.
cf:
`class`:test_deposit_update.DepositReplaceExistingDataTest
`class`:test_deposit_update.DepositUpdateDepositWithNewDataTest
`class`:test_deposit_update.DepositUpdateFailuresTest
`class`:test_deposit_delete.DepositDeleteTest
"""
def setUp(self):
super().setUp()
self.atom_entry_data0 = b"""
some-external-id
https://hal-test.archives-ouvertes.fr/some-external-id
some awesome author
"""
self.atom_entry_data1 = b"""
another one
no one
+ 2017-10-07T15:17:08Z
"""
self.atom_entry_data2 = b"""
Awesome Compiler
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
1785io25c695
2017-10-07T15:17:08Z
some awesome author
https://hal-test.archives-ouvertes.fr/id
"""
self.codemeta_entry_data0 = b"""
Awesome Compiler
https://hal-test.archives-ouvertes.fr/1785io25c695
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
1785io25c695
2017-10-07T15:17:08Z
some awesome author
description
key-word 1
"""
self.codemeta_entry_data1 = b"""
Composing a Web of Audio Applications
hal
hal-01243065
hal-01243065
https://hal-test.archives-ouvertes.fr/hal-01243065
test
DSP programming,Web
2017-05-03T16:08:47+02:00
this is the description
1
phpstorm
stable
php
python
C
GNU General Public License v3.0 only
CeCILL Free Software License Agreement v1.1
HAL
hal@ccsd.cnrs.fr
Morane Gruenpeter
"""
def create_deposit_with_invalid_archive(self,
external_id='some-external-id-1'):
url = reverse(COL_IRI, args=[self.collection.name])
data = b'some data which is clearly not a zip file'
md5sum = hashlib.md5(data).hexdigest()
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=data,
# + headers
CONTENT_LENGTH=len(data),
# other headers needs HTTP_ prefix to be taken into account
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=md5sum,
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content['deposit_id'])
return deposit_id
def create_deposit_with_status(
self, status,
external_id='some-external-id-1',
swh_id=None,
swh_id_context=None,
swh_anchor_id=None,
swh_anchor_id_context=None,
status_detail=None):
# create an invalid deposit which we will update further down the line
deposit_id = self.create_deposit_with_invalid_archive(external_id)
# We cannot create some form of deposit with a given status in
# test context ('rejected' for example). Update in place the
# deposit with such status to permit some further tests.
deposit = Deposit.objects.get(pk=deposit_id)
if status == DEPOSIT_STATUS_REJECTED:
deposit.status_detail = status_detail
deposit.status = status
if swh_id:
deposit.swh_id = swh_id
if swh_id_context:
deposit.swh_id_context = swh_id_context
if swh_anchor_id:
deposit.swh_anchor_id = swh_anchor_id
if swh_anchor_id_context:
deposit.swh_anchor_id_context = swh_anchor_id_context
deposit.save()
return deposit_id
def create_simple_deposit_partial(self, external_id='some-external-id'):
"""Create a simple deposit (1 request) in `partial` state and returns
its new identifier.
Returns:
deposit id
"""
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/atom+xml;type=entry',
data=self.atom_entry_data0,
HTTP_SLUG=external_id,
HTTP_IN_PROGRESS='true')
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content['deposit_id'])
return deposit_id
def create_deposit_partial_with_data_in_args(self, data):
"""Create a simple deposit (1 request) in `partial` state with the data
or metadata as an argument and returns its new identifier.
Args:
data: atom entry
Returns:
deposit id
"""
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/atom+xml;type=entry',
data=data,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS='true')
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content['deposit_id'])
return deposit_id
def _update_deposit_with_status(self, deposit_id, status_partial=False):
"""Add to a given deposit another archive and update its current
status to `deposited` (by default).
Returns:
deposit id
"""
# when
response = self.client.post(
reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
content_type='application/atom+xml;type=entry',
data=self.atom_entry_data1,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial)
# then
assert response.status_code == status.HTTP_201_CREATED
return deposit_id
def create_deposit_ready(self, external_id='some-external-id'):
"""Create a complex deposit (2 requests) in status `deposited`.
"""
deposit_id = self.create_simple_deposit_partial(
external_id=external_id)
deposit_id = self._update_deposit_with_status(deposit_id)
return deposit_id
def create_deposit_partial(self, external_id='some-external-id'):
"""Create a complex deposit (2 requests) in status `partial`.
"""
deposit_id = self.create_simple_deposit_partial(
external_id=external_id)
deposit_id = self._update_deposit_with_status(
deposit_id, status_partial=True)
return deposit_id
def add_metadata_to_deposit(self, deposit_id, status_partial=False):
"""Add metadata to deposit.
"""
# when
response = self.client.post(
reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
content_type='application/atom+xml;type=entry',
data=self.codemeta_entry_data1,
HTTP_SLUG='external-id',
HTTP_IN_PROGRESS=status_partial)
assert response.status_code == status.HTTP_201_CREATED
# then
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit is not None
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
assert deposit_requests is not []
for dr in deposit_requests:
if dr.type == 'metadata':
assert deposit_requests[0].metadata is not {}
return deposit_id
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
index 3e03db67..20e7099f 100644
--- a/swh/deposit/tests/loader/test_loader.py
+++ b/swh/deposit/tests/loader/test_loader.py
@@ -1,169 +1,171 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import unittest
import shutil
import pytest
from rest_framework.test import APITestCase
from swh.model import hashutil
from swh.deposit.models import Deposit
from swh.deposit.loader import loader
from swh.deposit.config import (
PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT
)
from django.urls import reverse
from swh.loader.core.tests import BaseLoaderStorageTest
from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG
from .. import TEST_LOADER_CONFIG
from ..common import (BasicTestCase, WithAuthTestCase,
CommonCreationRoutine,
FileSystemCreationRoutine)
class TestLoaderUtils(unittest.TestCase):
def assertRevisionsOk(self, expected_revisions): # noqa: N802
"""Check the loader's revisions match the expected revisions.
Expects self.loader to be instantiated and ready to be
inspected (meaning the loading took place).
Args:
expected_revisions (dict): Dict with key revision id,
value the targeted directory id.
"""
# The last revision being the one used later to start back from
for rev in self.loader.state['revision']:
rev_id = hashutil.hash_to_hex(rev['id'])
directory_id = hashutil.hash_to_hex(rev['directory'])
self.assertEqual(expected_revisions[rev_id], directory_id)
@pytest.mark.fs
class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase,
BasicTestCase, CommonCreationRoutine,
FileSystemCreationRoutine, TestLoaderUtils,
BaseLoaderStorageTest):
def setUp(self):
super().setUp()
# create the extraction dir used by the loader
os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True)
- # 1. create a deposit with archive and metadata
- self.deposit_id = self.create_simple_binary_deposit()
- # 2. Sets a basic client which accesses the test data
+ # Sets a basic client which accesses the test data
loader_client = SWHDepositTestClient(self.client,
config=CLIENT_TEST_CONFIG)
- # 3. setup loader with that client
+ # Setup loader with that client
self.loader = loader.DepositLoader(client=loader_client)
self.storage = self.loader.storage
def tearDown(self):
super().tearDown()
shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir'])
def test_inject_deposit_ready(self):
"""Load a deposit which is ready
"""
- args = [self.collection.name, self.deposit_id]
+ # create a deposit with archive and metadata
+ deposit_id = self.create_simple_binary_deposit()
+ self.update_binary_deposit(deposit_id, status_partial=False)
+
+ args = [self.collection.name, deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
# when
res = self.loader.load(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
# then
self.assertEqual(res['status'], 'eventful', res)
self.assertCountContents(1)
self.assertCountDirectories(1)
self.assertCountRevisions(1)
self.assertCountReleases(0)
self.assertCountSnapshots(1)
def test_inject_deposit_verify_metadata(self):
"""Load a deposit with metadata, test metadata integrity
"""
- self.deposit_metadata_id = self.add_metadata_to_deposit(
- self.deposit_id)
- args = [self.collection.name, self.deposit_metadata_id]
+ deposit_id = self.create_simple_binary_deposit()
+ self.add_metadata_to_deposit(deposit_id, status_partial=False)
+ args = [self.collection.name, deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
# when
self.loader.load(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
# then
self.assertCountContents(1)
self.assertCountDirectories(1)
self.assertCountRevisions(1)
self.assertCountReleases(0)
self.assertCountSnapshots(1)
codemeta = 'codemeta:'
origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065'
expected_origin_metadata = {
'@xmlns': 'http://www.w3.org/2005/Atom',
'@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
'author': {
'email': 'hal@ccsd.cnrs.fr',
'name': 'HAL'
},
codemeta + 'url': origin_url,
codemeta + 'runtimePlatform': 'phpstorm',
codemeta + 'license': [
{
codemeta + 'name': 'GNU General Public License v3.0 only'
},
{
codemeta + 'name': 'CeCILL Free Software License Agreement v1.1' # noqa
}
],
codemeta + 'author': {
codemeta + 'name': 'Morane Gruenpeter'
},
codemeta + 'programmingLanguage': ['php', 'python', 'C'],
codemeta + 'applicationCategory': 'test',
codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00',
codemeta + 'version': '1',
'external_identifier': 'hal-01243065',
'title': 'Composing a Web of Audio Applications',
codemeta + 'description': 'this is the description',
'id': 'hal-01243065',
'client': 'hal',
codemeta + 'keywords': 'DSP programming,Web',
codemeta + 'developmentStatus': 'stable'
}
self.assertOriginMetadataContains('deposit', origin_url,
expected_origin_metadata)
- deposit = Deposit.objects.get(pk=self.deposit_id)
+ deposit = Deposit.objects.get(pk=deposit_id)
self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*')
self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % (
deposit.swh_id, origin_url
))
self.assertRegex(deposit.swh_anchor_id, r'^swh:1:rev:.*')
self.assertEqual(deposit.swh_anchor_id_context, '%s;origin=%s' % (
deposit.swh_anchor_id, origin_url
))