diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -124,8 +124,6 @@ """ required_fields = { - 'url': False, - 'external_identifier': False, 'author': False, } alternate_fields = { diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -14,6 +14,7 @@ from swh.core import tarball from swh.model import identifiers +from swh.deposit import utils from . import DepositReadMixin from ...config import SWH_PERSON, ARCHIVE_TYPE @@ -129,13 +130,6 @@ self.provider = self.config['provider'] self.tool = self.config['tool'] - def _retrieve_url(self, deposit, metadata): - client_domain = deposit.client.domain - for field in metadata: - if 'url' in field: - if client_domain in metadata[field]: - return metadata[field] - def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. @@ -147,14 +141,13 @@ Dictionary of data representing the deposit to inject in swh. """ - data = {} metadata = self._metadata_get(deposit) - # create origin_url from metadata only after deposit_check validates it - origin_url = self._retrieve_url(deposit, metadata) # Read information metadata - data['origin'] = { - 'type': 'deposit', - 'url': origin_url + data = { + 'origin': { + 'type': 'deposit', + 'url': utils.origin_url_from(deposit), + } } # revision diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -137,7 +137,7 @@ mandatory = details['metadata'][0] self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING) self.assertEqual(set(mandatory['fields']), - set(['url', 'external_identifier', 'author'])) + set(['author'])) alternate = details['metadata'][1] self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING) self.assertEqual(alternate['fields'], ['name or title']) diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -19,6 +19,7 @@ from django.urls import reverse from swh.loader.core.tests import BaseLoaderStorageTest +from swh.deposit import utils from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from .. import TEST_LOADER_CONFIG @@ -121,7 +122,9 @@ self.assertCountSnapshots(1) codemeta = 'codemeta:' - origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065' + deposit = Deposit.objects.get(pk=self.deposit_id) + origin_url = utils.origin_url_from(deposit) + expected_origin_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', @@ -129,7 +132,7 @@ 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, - codemeta + 'url': origin_url, + codemeta + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # same as xml # noqa codemeta + 'runtimePlatform': 'phpstorm', codemeta + 'license': [ { @@ -157,8 +160,6 @@ self.assertOriginMetadataContains('deposit', origin_url, expected_origin_metadata) - deposit = Deposit.objects.get(pk=self.deposit_id) - self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( deposit.swh_id, origin_url diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -7,6 +7,24 @@ from swh.deposit import utils +from swh.deposit.models import Deposit, DepositClient + + +def test_origin_url_from(): + + for provider_url, external_id in ( + ('http://somewhere.org', 'uuid'), + ('http://overthejungle.org', 'diuu'), + ): + deposit = Deposit( + client=DepositClient(provider_url=provider_url), + external_id=external_id + ) + + actual_origin_url = utils.origin_url_from(deposit) + + assert actual_origin_url == '%s/%s' % ( + provider_url.rstrip('/'), external_id) class UtilsTestCase(unittest.TestCase): diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -6,6 +6,21 @@ from types import GeneratorType +def origin_url_from(deposit): + """Given a deposit instance, return the associated origin url + + Args: + deposit (Deposit): The deposit from which derives the origin url + + Returns + The associated origin url + + """ + base_url = deposit.client.provider_url + external_id = deposit.external_id + return '%s/%s' % (base_url.rstrip('/'), external_id) + + def merge(*dicts): """Given an iterator of dicts, merge them losing no information.