Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/api/private/deposit_check.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import re | import re | ||||
import tarfile | |||||
import zipfile | |||||
from rest_framework import status | from rest_framework import status | ||||
from tarfile import TarFile, is_tarfile | |||||
from zipfile import ZipFile, is_zipfile | |||||
from . import DepositReadMixin | from . import DepositReadMixin | ||||
from ..common import SWHGetDepositAPI, SWHPrivateAPIView | from ..common import SWHGetDepositAPI, SWHPrivateAPIView | ||||
from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED | from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED | ||||
from ...config import ARCHIVE_TYPE | from ...config import ARCHIVE_TYPE | ||||
from ...models import Deposit | from ...models import Deposit | ||||
MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' | MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' | ||||
ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' | ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' | ||||
MANDATORY_ARCHIVE_UNREADABLE = 'Deposit was rejected because at least one of its associated archives was not readable' # noqa | MANDATORY_ARCHIVE_UNREADABLE = 'Deposit was rejected because at least one of its associated archives was not readable' # noqa | ||||
MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (e.g contains an archive)' # noqa | MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (e.g contains an archive)' # noqa | ||||
MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' | MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' | ||||
MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' | MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' | ||||
INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa | INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa | ||||
moranegg: I like this new approach | |||||
class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): | class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): | ||||
"""Dedicated class to read a deposit's raw archives content. | """Dedicated class to read a deposit's raw archives content. | ||||
Only GET is supported. | Only GET is supported. | ||||
""" | """ | ||||
def _check_deposit_archives(self, deposit): | def _check_deposit_archives(self, deposit): | ||||
Show All 25 Lines | def _check_deposit_archives(self, deposit): | ||||
'fields': archive_request.id | 'fields': archive_request.id | ||||
}) | }) | ||||
if not errors: | if not errors: | ||||
return True, None | return True, None | ||||
return False, { | return False, { | ||||
'archive': errors | 'archive': errors | ||||
} | } | ||||
def _check_archive(self, archive_request): | def _check_archive(self, archive_request): | ||||
Done Inline Actionsyes that's good.
moranegg: yes that's good.
on the third check i would add the name valid (as the opposite of invalid)… | |||||
"""Check that a given archive is actually ok: | """Check that a given archive is actually ok: | ||||
- reading ok | - reading ok | ||||
- content of the archive at the first level is not only an | - content of the archive at the first level is not only an | ||||
archive. | archive. | ||||
Args: | Args: | ||||
archive_path (DepositRequest): Archive to check | archive_path (DepositRequest): Archive to check | ||||
Returns: | Returns: | ||||
True if archive is check compliant, False otherwise. | True if archive is check compliant, False otherwise. | ||||
""" | """ | ||||
archive_path = archive_request.archive.path | archive_path = archive_request.archive.path | ||||
try: | try: | ||||
if is_zipfile(archive_path): | if zipfile.is_zipfile(archive_path): | ||||
with ZipFile(archive_path) as f: | with zipfile.ZipFile(archive_path) as f: | ||||
files = f.namelist() | files = f.namelist() | ||||
elif is_tarfile(archive_path): | elif tarfile.is_tarfile(archive_path): | ||||
with TarFile(archive_path) as f: | with tarfile.open(archive_path) as f: | ||||
files = f.getnames() | files = f.getnames() | ||||
else: | else: | ||||
return False, MANDATORY_ARCHIVE_UNSUPPORTED | return False, MANDATORY_ARCHIVE_UNSUPPORTED | ||||
except Exception: | |||||
return False, MANDATORY_ARCHIVE_UNREADABLE | |||||
if len(files) > 1: | if len(files) > 1: | ||||
return True, None | return True, None | ||||
element = files[0] | element = files[0] | ||||
pattern = re.compile( | pattern = re.compile( | ||||
r'.*\.(zip|tar|tar.gz|.xz|tar.xz|Z|.tar.Z|bz2|tar.bz2)$') | r'.*\.(zip|tar|tar.gz|.xz|tar.xz|Z|.tar.Z|bz2|tar.bz2)$') | ||||
if pattern.match(element): # invalid archive in archive | if pattern.match(element): # invalid archive in archive | ||||
return False, MANDATORY_ARCHIVE_INVALID | return False, MANDATORY_ARCHIVE_INVALID | ||||
except Exception: | |||||
return False, MANDATORY_ARCHIVE_UNREADABLE | |||||
return True, None | return True, None | ||||
def _check_metadata(self, metadata): | def _check_metadata(self, metadata): | ||||
"""Check to execute on all metadata for mandatory field presence. | """Check to execute on all metadata for mandatory field presence. | ||||
Args: | Args: | ||||
metadata (dict): Metadata dictionary to check for mandatory fields | metadata (dict): Metadata dictionary to check for mandatory fields | ||||
▲ Show 20 Lines • Show All 126 Lines • Show Last 20 Lines |
I like this new approach