diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -78,8 +78,8 @@ else: return True - def _check_deposit_metadata(self, deposit): - """Given a deposit, check each deposit request of type metadata. + def _metadata_get(self, deposit): + """Given a deposit, aggregate all metadata requests. Args: The deposit to check metadata for. @@ -91,14 +91,13 @@ metadata = {} for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): metadata.update(dr.metadata) - - return self._check_metadata(metadata) + return metadata def _check_metadata(self, metadata): - """Check to execute on all metadata. + """Check to execute on all metadata for mandatory field presence. Args: - metadata (): Metadata to actually check + metadata (dict): Metadata to actually check Returns: True if metadata is ok, False otherwise. @@ -115,6 +114,24 @@ for possible_names in required_fields) return result + def _check_url(self, client_url, metadata): + """Check compatibility between client_url and url field in metadata + + Args: + client_url (str): url associated with the deposit's client + metadata (dict): Metadata where to find url + Returns: + True if url is ok, False otherwise. + + """ + metadata_urls = [] + for field in metadata: + if 'url' in field: + metadata_urls.append(metadata[field]) + + return any(client_url in url + for url in metadata_urls) + def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. @@ -129,6 +146,8 @@ """ deposit = Deposit.objects.get(pk=deposit_id) + client_url = deposit.client.url + metadata = self._metadata_get(deposit) problems = [] # will check each deposit's associated request (both of type # archive and metadata) for errors @@ -136,11 +155,15 @@ if not archives_status: problems.append('archive(s)') - metadata_status = self._check_deposit_metadata(deposit) + metadata_status = self._check_metadata(metadata) if not metadata_status: problems.append('metadata') - deposit_status = archives_status and metadata_status + url_status = self._check_url(client_url, metadata) + if not url_status: + problems.append('url') + + deposit_status = archives_status and metadata_status and url_status # if any problems arose, the deposit is rejected if not deposit_status: diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -151,6 +151,13 @@ return metadata + def _retrieve_url(self, deposit, metadata): + client_url = deposit.client.url + for field in metadata: + if 'url' in field: + if client_url in metadata[field]: + return metadata[field] + def aggregate(self, deposit, requests): """Aggregate multiple data on deposit into one unified data dictionary. @@ -167,12 +174,12 @@ # Retrieve tarballs/metadata information metadata = self._aggregate_metadata(deposit, requests) - + # create origin_url from metadata only after deposit_check validates it + origin_url = self._retrieve_url(deposit, metadata) # Read information metadata data['origin'] = { 'type': 'deposit', - 'url': os.path.join(deposit.client.url.rstrip('/'), - deposit.external_id), + 'url': origin_url } # revision diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -72,7 +72,8 @@ data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) self.assertEqual(data['details'], - 'Some archive(s) and metadata failed the checks.') + 'Some archive(s) and metadata and url ' + + 'failed the checks.') deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -45,18 +45,22 @@ expected_meta = { 'origin': { - 'url': 'https://hal.test.fr/some-external-id', + 'url': 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': + 'some-external-id', + '{http://www.w3.org/2005/Atom}url': + 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': '', 'provider_type': 'deposit_client', - 'provider_url': 'https://hal.test.fr/', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { @@ -76,6 +80,9 @@ 'date': None, 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': + 'some-external-id', + '{http://www.w3.org/2005/Atom}url': + 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' @@ -125,18 +132,22 @@ expected_meta = { 'origin': { - 'url': 'https://hal.test.fr/some-external-id', + 'url': 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': - 'some-external-id' + 'some-external-id', + '{http://www.w3.org/2005/Atom}url': + 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id' }, 'provider': { 'provider_name': '', 'provider_type': 'deposit_client', - 'provider_url': 'https://hal.test.fr/', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { @@ -157,7 +168,10 @@ 'message': ': Deposit %s in collection hal' % deposit_id, 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': - 'some-external-id' + 'some-external-id', + '{http://www.w3.org/2005/Atom}url': + 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id' }, 'parents': [swh_id] }, diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -101,7 +101,7 @@ 1785io25c695 2017-10-07T15:17:08Z some awesome author - http://test.test.fr + https://hal-test.archives-ouvertes.fr """ def tearDown(self): @@ -187,7 +187,7 @@ deposit_request_types[deposit_request_type] = drt _name = 'hal' - _url = 'https://hal.test.fr/' + _url = 'https://hal-test.archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() @@ -247,11 +247,14 @@ self.atom_entry_data0 = b""" some-external-id + https://hal-test.archives-ouvertes.fr/some-external-id """ self.atom_entry_data1 = b""" anotherthing + https://hal-test.archives-ouvertes.fr/anotherthing + """ self.atom_entry_data2 = b""" @@ -261,14 +264,14 @@ 1785io25c695 2017-10-07T15:17:08Z some awesome author - http://test.test.fr + https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler - http://test.test.fr + https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z