diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -0,0 +1,51 @@ +# Copyright (C) 2017-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from ...config import METADATA_TYPE +from ...models import DepositRequest, Deposit + + +class DepositReadMixin: + """Deposit Read mixin + + """ + + def _deposit_requests(self, deposit, request_type): + """Given a deposit, yields its associated deposit_request + + Args: + deposit (Deposit): Deposit to list requests for + request_type (str): Archive or metadata type + + Yields: + deposit requests of type request_type associated to the deposit + + """ + if isinstance(deposit, int): + deposit = Deposit.objects.get(pk=deposit) + + deposit_requests = DepositRequest.objects.filter( + type=self.deposit_request_types[request_type], + deposit=deposit).order_by('id') + + for deposit_request in deposit_requests: + yield deposit_request + + def _metadata_get(self, deposit): + """Given a deposit, aggregate all metadata requests. + + Args: + deposit (Deposit): The deposit instance to extract + metadata from. + + Returns: + metadata dict from the deposit. + + """ + metadata = {} + for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): + metadata.update(dr.metadata) + return metadata diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -8,12 +8,11 @@ from rest_framework import status - +from . import DepositReadMixin from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED -from ...config import ARCHIVE_TYPE, METADATA_TYPE -from ...models import Deposit, DepositRequest - +from ...config import ARCHIVE_TYPE +from ...models import Deposit MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' @@ -23,30 +22,12 @@ INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa -class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): +class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ - def _deposit_requests(self, deposit, request_type): - """Given a deposit, yields its associated deposit_request - - Args: - deposit (Deposit): Deposit to list requests for - request_type (str): Archive or metadata type - - Yields: - deposit requests of type request_type associated to the deposit - - """ - deposit_requests = DepositRequest.objects.filter( - type=self.deposit_request_types[request_type], - deposit=deposit).order_by('id') - - for deposit_request in deposit_requests: - yield deposit_request - def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. @@ -99,22 +80,6 @@ else: return True - def _metadata_get(self, deposit): - """Given a deposit, aggregate all metadata requests. - - Args: - deposit (Deposit): The deposit instance to extract - metadata from. - - Returns: - metadata dict from the deposit. - - """ - metadata = {} - for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): - metadata.update(dr.metadata) - return metadata - def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -15,9 +15,10 @@ from swh.core import tarball from swh.model import identifiers -from ...config import SWH_PERSON +from . import DepositReadMixin +from ...config import SWH_PERSON, ARCHIVE_TYPE from ..common import SWHGetDepositAPI, SWHPrivateAPIView -from ...models import Deposit, DepositRequest +from ...models import Deposit @contextmanager @@ -64,7 +65,8 @@ yield archive_paths[0] -class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView): +class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView, + DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. @@ -87,11 +89,8 @@ path to deposited archives """ - deposit = Deposit.objects.get(pk=deposit_id) - deposit_requests = DepositRequest.objects.filter( - deposit=deposit, - type=self.deposit_request_types['archive']).order_by('id') - + deposit_requests = self._deposit_requests( + deposit_id, request_type=ARCHIVE_TYPE) for deposit_request in deposit_requests: yield deposit_request.archive.path @@ -116,10 +115,11 @@ content_type='application/octet-stream') -class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView): +class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView, + DepositReadMixin): """Class in charge of aggregating metadata on a deposit. - """ + """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the @@ -141,16 +141,6 @@ self.provider = self.config['provider'] self.tool = self.config['tool'] - def _aggregate_metadata(self, deposit, metadata_requests): - """Retrieve and aggregates metadata information. - - """ - metadata = {} - for req in metadata_requests: - metadata.update(req.metadata) - - return metadata - def _retrieve_url(self, deposit, metadata): client_domain = deposit.client.domain for field in metadata: @@ -158,22 +148,19 @@ if client_domain in metadata[field]: return metadata[field] - def aggregate(self, deposit, requests): - """Aggregate multiple data on deposit into one unified data dictionary. + def metadata_read(self, deposit): + """Read and aggregate multiple data on deposit into one unified data + dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. - requests ([DepositRequest]): List of associated requests which - need aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ data = {} - - # Retrieve tarballs/metadata information - metadata = self._aggregate_metadata(deposit, requests) + metadata = self._metadata_get(deposit) # create origin_url from metadata only after deposit_check validates it origin_url = self._retrieve_url(deposit, metadata) # Read information metadata @@ -226,10 +213,7 @@ def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) - requests = DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata']) - - data = self.aggregate(deposit, requests) + data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -52,6 +52,7 @@ 'origin_metadata': { 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier':'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -79,6 +80,7 @@ 'date': None, 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -136,6 +138,7 @@ 'origin_metadata': { 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -164,6 +167,7 @@ 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '@xmlns': 'http://www.w3.org/2005/Atom', + 'author': 'some awesome author', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -268,8 +268,7 @@ self.atom_entry_data1 = b""" - anotherthing - https://hal-test.archives-ouvertes.fr/anotherthing + some awesome author """