diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -15,6 +15,7 @@ from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date +from swh.deposit import utils from . import DepositReadMixin from ...config import SWH_PERSON, ARCHIVE_TYPE @@ -130,13 +131,6 @@ self.provider = self.config['provider'] self.tool = self.config['tool'] - def _retrieve_url(self, deposit, metadata): - client_domain = deposit.client.domain - for field in metadata: - if 'url' in field: - if client_domain in metadata[field]: - return metadata[field] - def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. @@ -178,14 +172,13 @@ Dictionary of data representing the deposit to inject in swh. """ - data = {} metadata = self._metadata_get(deposit) - # create origin_url from metadata only after deposit_check validates it - origin_url = self._retrieve_url(deposit, metadata) # Read information metadata - data['origin'] = { - 'type': 'deposit', - 'url': origin_url + data = { + 'origin': { + 'type': 'deposit', + 'url': utils.origin_url_from(deposit), + } } # revision diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -19,6 +19,7 @@ from django.urls import reverse from swh.loader.core.tests import BaseLoaderStorageTest +from swh.deposit import utils from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from .. import TEST_LOADER_CONFIG @@ -123,7 +124,9 @@ self.assertCountSnapshots(1) codemeta = 'codemeta:' - origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065' + deposit = Deposit.objects.get(pk=deposit_id) + origin_url = utils.origin_url_from(deposit) + expected_origin_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', @@ -131,7 +134,7 @@ 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, - codemeta + 'url': origin_url, + codemeta + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # same as xml # noqa codemeta + 'runtimePlatform': 'phpstorm', codemeta + 'license': [ { @@ -159,8 +162,6 @@ self.assertOriginMetadataContains('deposit', origin_url, expected_origin_metadata) - deposit = Deposit.objects.get(pk=deposit_id) - self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( deposit.swh_id, origin_url diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -8,6 +8,24 @@ from unittest.mock import patch from swh.deposit import utils +from swh.deposit.models import Deposit, DepositClient + + +def test_origin_url_from(): + + for provider_url, external_id in ( + ('http://somewhere.org', 'uuid'), + ('http://overthejungle.org', 'diuu'), + ): + deposit = Deposit( + client=DepositClient(provider_url=provider_url), + external_id=external_id + ) + + actual_origin_url = utils.origin_url_from(deposit) + + assert actual_origin_url == '%s/%s' % ( + provider_url.rstrip('/'), external_id) class UtilsTestCase(unittest.TestCase): diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -10,6 +10,21 @@ from swh.model.identifiers import normalize_timestamp +def origin_url_from(deposit): + """Given a deposit instance, return the associated origin url + + Args: + deposit (Deposit): The deposit from which derives the origin url + + Returns + The associated origin url + + """ + base_url = deposit.client.provider_url + external_id = deposit.external_id + return '%s/%s' % (base_url.rstrip('/'), external_id) + + def merge(*dicts): """Given an iterator of dicts, merge them losing no information.