Changeset View
Standalone View
swh/deposit/api/private/deposit_check.py
Show First 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | def _check_archive(self, archive): | ||||
zf = zipfile.ZipFile(archive.path) | zf = zipfile.ZipFile(archive.path) | ||||
zf.infolist() | zf.infolist() | ||||
except Exception as e: | except Exception as e: | ||||
return False | return False | ||||
else: | else: | ||||
return True | return True | ||||
def _check_deposit_metadata(self, deposit): | def _check_deposit_metadata(self, deposit): | ||||
"""Given a deposit, check each deposit request of type metadata. | """Given a deposit, check each deposit request of type metadata, | ||||
ardumont: We don't want to expose the implementation detail of how we do the check in the documentation… | |||||
Done Inline ActionsWell, the docstring no longer matches... :) ardumont: Well, the docstring no longer matches... :) | |||||
by aggregating all metadata requests one bundle. | |||||
Args: | Args: | ||||
The deposit to check metadata for. | The deposit to check metadata for. | ||||
Returns: | Returns: | ||||
True if the deposit's associated metadata are ok, False otherwise. | True if the deposit's associated metadata are ok, False otherwise. | ||||
""" | """ | ||||
metadata = {} | metadata = {} | ||||
for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): | for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): | ||||
metadata.update(dr.metadata) | metadata.update(dr.metadata) | ||||
return self._check_metadata(metadata) | return self._check_metadata(metadata) | ||||
def _check_metadata(self, metadata): | def _check_metadata(self, metadata): | ||||
"""Check to execute on all metadata. | """Check to execute on all metadata and keeps metadata_url for url validation. | ||||
Done Inline ActionsThat would be the right location for the previous docstring. ardumont: That would be the right location for the previous docstring. | |||||
Args: | Args: | ||||
metadata (): Metadata to actually check | metadata (): Metadata to actually check | ||||
Done Inline ActionsI forgot (dict) instead of (), can you please add it? ardumont: I forgot `(dict)` instead of `()`, can you please add it? | |||||
Returns: | Returns: | ||||
True if metadata is ok, False otherwise. | True if metadata is ok, False otherwise. | ||||
""" | """ | ||||
required_fields = (('url',), | required_fields = (('url',), | ||||
('external_identifier',), | ('external_identifier',), | ||||
('name', 'title'), | ('name', 'title'), | ||||
('author',)) | ('author',)) | ||||
result = all(any(name in field | result = all(any(name in field | ||||
for field in metadata | for field in metadata | ||||
for name in possible_names) | for name in possible_names) | ||||
for possible_names in required_fields) | for possible_names in required_fields) | ||||
urls = [] | |||||
for field in metadata: | |||||
if 'url' in field: | |||||
urls.append(metadata[field]) | |||||
Done Inline ActionsPlease, see comments line 143 below ardumont: Please, see comments line 143 below | |||||
self.metadata_url = urls | |||||
return result | return result | ||||
def _check_url(self, client_url, metadata_urls): | |||||
validatation = any(client_url in url | |||||
for url in metadata_urls) | |||||
Done Inline ActionsSimply returning the result here would be fine. Also a docstring would be good. As a note, there is a typo in the variable's name ;) ardumont: Simply returning the result here would be fine.
Also a docstring would be good.
As a note… | |||||
return validatation | |||||
def process_get(self, req, collection_name, deposit_id): | def process_get(self, req, collection_name, deposit_id): | ||||
"""Build a unique tarball from the multiple received and stream that | """Build a unique tarball from the multiple received and stream that | ||||
content to the client. | content to the client. | ||||
Args: | Args: | ||||
req (Request): | req (Request): | ||||
collection_name (str): Collection owning the deposit | collection_name (str): Collection owning the deposit | ||||
deposit_id (id): Deposit concerned by the reading | deposit_id (id): Deposit concerned by the reading | ||||
Returns: | Returns: | ||||
Tuple status, stream of content, content-type | Tuple status, stream of content, content-type | ||||
""" | """ | ||||
deposit = Deposit.objects.get(pk=deposit_id) | deposit = Deposit.objects.get(pk=deposit_id) | ||||
client_url = deposit.client.url | |||||
Done Inline ActionsI don't like setting variable outside of the __init__ construct, especially when we can avoid it :) If we want to manipulate the metadata multiple times (as introduced here), we should have a method for it. Then the multiple check methods will manipulate those metadata. The intents are clearer that way. So here, that would give:
Then no need for that intermediary variable. ardumont: I don't like setting variable outside of the `__init__` construct, especially when we can avoid… | |||||
self.metadata_url = None # created in _check_metadata | |||||
problems = [] | problems = [] | ||||
# will check each deposit's associated request (both of type | # will check each deposit's associated request (both of type | ||||
# archive and metadata) for errors | # archive and metadata) for errors | ||||
archives_status = self._check_deposit_archives(deposit) | archives_status = self._check_deposit_archives(deposit) | ||||
if not archives_status: | if not archives_status: | ||||
problems.append('archive(s)') | problems.append('archive(s)') | ||||
metadata_status = self._check_deposit_metadata(deposit) | metadata_status = self._check_deposit_metadata(deposit) | ||||
if not metadata_status: | if not metadata_status: | ||||
problems.append('metadata') | problems.append('metadata') | ||||
deposit_status = archives_status and metadata_status | url_status = self._check_url(client_url, self.metadata_url) | ||||
if not url_status: | |||||
problems.append('url') | |||||
deposit_status = archives_status and metadata_status and url_status | |||||
# if any problems arose, the deposit is rejected | # if any problems arose, the deposit is rejected | ||||
if not deposit_status: | if not deposit_status: | ||||
deposit.status = DEPOSIT_STATUS_REJECTED | deposit.status = DEPOSIT_STATUS_REJECTED | ||||
else: | else: | ||||
deposit.status = DEPOSIT_STATUS_READY | deposit.status = DEPOSIT_STATUS_READY | ||||
deposit.save() | deposit.save() | ||||
return (status.HTTP_200_OK, | return (status.HTTP_200_OK, | ||||
json.dumps({ | json.dumps({ | ||||
'status': deposit.status, | 'status': deposit.status, | ||||
'details': 'Some %s failed the checks.' % ( | 'details': 'Some %s failed the checks.' % ( | ||||
' and '.join(problems), ), | ' and '.join(problems), ), | ||||
}), | }), | ||||
'application/json') | 'application/json') |
We don't want to expose the implementation detail of how we do the check in the documentation ;)
We want to expose the intent.
A better docstring than my previous attempt would be: