Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/api/private/deposit_check.py
# Copyright (C) 2017-2019 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import re | import re | ||||
import tarfile | import tarfile | ||||
import zipfile | import zipfile | ||||
from itertools import chain | |||||
from shutil import get_unpack_formats | |||||
from rest_framework import status | from rest_framework import status | ||||
from swh.scheduler.utils import create_oneshot_task_dict | |||||
from . import DepositReadMixin, SWHPrivateAPIView | from . import DepositReadMixin, SWHPrivateAPIView | ||||
from ..common import SWHGetDepositAPI | from ..common import SWHGetDepositAPI | ||||
from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED | from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED | ||||
from ...config import ARCHIVE_TYPE | from ...config import ARCHIVE_TYPE | ||||
from ...models import Deposit | from ...models import Deposit | ||||
MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' | MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' | ||||
ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' | ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' | ||||
MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa | MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa | ||||
MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa | MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa | ||||
MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' | MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' | ||||
MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' | MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' | ||||
ARCHIVE_EXTENSIONS = [ | ARCHIVE_EXTENSIONS = [ | ||||
'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', | 'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', | ||||
'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' | 'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' | ||||
] | ] | ||||
PATTERN_ARCHIVE_EXTENSION = re.compile( | PATTERN_ARCHIVE_EXTENSION = re.compile( | ||||
r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) | r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) | ||||
def known_archive_format(filename): | |||||
return any(filename.endswith(t) for t in | |||||
chain(*(x[1] for x in get_unpack_formats()))) | |||||
class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): | class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): | ||||
"""Dedicated class to read a deposit's raw archives content. | """Dedicated class to read a deposit's raw archives content. | ||||
Only GET is supported. | Only GET is supported. | ||||
""" | """ | ||||
def _check_deposit_archives(self, deposit): | def _check_deposit_archives(self, deposit): | ||||
"""Given a deposit, check each deposit request of type archive. | """Given a deposit, check each deposit request of type archive. | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | def _check_archive(self, archive_request): | ||||
archive_path (DepositRequest): Archive to check | archive_path (DepositRequest): Archive to check | ||||
Returns: | Returns: | ||||
(True, None) if archive is check compliant, (False, | (True, None) if archive is check compliant, (False, | ||||
<detail-error>) otherwise. | <detail-error>) otherwise. | ||||
""" | """ | ||||
archive_path = archive_request.archive.path | archive_path = archive_request.archive.path | ||||
if not known_archive_format(archive_path): | |||||
return False, MANDATORY_ARCHIVE_UNSUPPORTED | |||||
try: | try: | ||||
if zipfile.is_zipfile(archive_path): | if zipfile.is_zipfile(archive_path): | ||||
with zipfile.ZipFile(archive_path) as f: | with zipfile.ZipFile(archive_path) as f: | ||||
files = f.namelist() | files = f.namelist() | ||||
elif tarfile.is_tarfile(archive_path): | elif tarfile.is_tarfile(archive_path): | ||||
with tarfile.open(archive_path) as f: | with tarfile.open(archive_path) as f: | ||||
files = f.getnames() | files = f.getnames() | ||||
else: | else: | ||||
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | def process_get(self, req, collection_name, deposit_id): | ||||
'status': deposit.status, | 'status': deposit.status, | ||||
'details': deposit.status_detail, | 'details': deposit.status_detail, | ||||
} | } | ||||
else: | else: | ||||
deposit.status = DEPOSIT_STATUS_VERIFIED | deposit.status = DEPOSIT_STATUS_VERIFIED | ||||
response = { | response = { | ||||
'status': deposit.status, | 'status': deposit.status, | ||||
} | } | ||||
if not deposit.load_task_id and self.config['checks']: | |||||
url = deposit.origin_url | |||||
task = create_oneshot_task_dict( | |||||
'load-deposit', url=url, deposit_id=deposit.id) | |||||
load_task_id = self.scheduler.create_tasks([task])[0]['id'] | |||||
deposit.load_task_id = load_task_id | |||||
deposit.save() | deposit.save() | ||||
return status.HTTP_200_OK, json.dumps(response), 'application/json' | return status.HTTP_200_OK, json.dumps(response), 'application/json' |