Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123408
D380.id1203.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
D380.id1203.diff
View Options
diff --git a/docs/endpoints/status.rst b/docs/endpoints/status.rst
--- a/docs/endpoints/status.rst
+++ b/docs/endpoints/status.rst
@@ -3,15 +3,14 @@
.. http:get:: /1/<collection-name>/<deposit-id>/
- Display deposit's status in regards to loading.
-
+ Returns deposit's status.
The different statuses:
- **partial**: multipart deposit is still ongoing
- - **deposited**: deposit completed
+ - **deposited**: deposit completed, ready for checks
- **rejected**: deposit failed the checks
- - **verified**: content and metadata verified
+ - **verified**: content and metadata verified, ready for loading
- **loading**: loading in-progress
- **done**: loading completed successfully
- **failed**: the deposit loading has failed
@@ -24,6 +23,51 @@
:statuscode 404: access to an unknown deposit
+Rejected deposit
+~~~~~~~~~~~~~~~~
+
+It so happens that deposit could be rejected. In that case, the
+`deposit_status_detail` entry will explain failed checks.
+
+Many reasons are possibles, here are some:
+
+- Deposit without software archive (main goal of the deposit is to
+ deposit software source code)
+
+- Deposit with malformed software archive (i.e archive within archive)
+
+- Deposit with invalid software archive (corrupted archive, although,
+ this one should happen during upload and not during checks)
+
+- Deposit with unsupported archive format
+
+- Deposit with missing metadata
+
Sample response
~~~~~~~~~~~~~~~
+
+ Successful deposit:
+
+ .. code:: xml
+
+ <entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/"
+ xmlns:dcterms="http://purl.org/dc/terms/">
+ <deposit_id>150</deposit_id>
+ <deposit_status>done</deposit_status>
+ <deposit_status_detail>The deposit has been successfully loaded into the Software Heritage archive</deposit_status_detail>
+ <deposit_swh_id>swh:1:rev:c648730299c2a4f4df3c1fe6e527ef3681f9527e</deposit_swh_id>
+ </entry>
+
+ Rejected deposit:
+
+ .. code:: xml
+
+ <entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/"
+ xmlns:dcterms="http://purl.org/dc/terms/">
+ <deposit_id>148</deposit_id>
+ <deposit_status>rejected</deposit_status>
+ <deposit_status_detail>- At least one url field must be compatible with the client's domain name (codemeta:url)</deposit_status_detail>
+ </entry>
diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py
--- a/swh/deposit/api/deposit_status.py
+++ b/swh/deposit/api/deposit_status.py
@@ -26,10 +26,10 @@
'summary': <summary-string>,
'fields': <impacted-fields-list>,
}],
- 'archive': {
+ 'archive': [{
'summary': <summary-string>,
- 'fields': [],
- }
+ 'fields': <impacted-fields-list>,
+ }]
}
@@ -45,19 +45,23 @@
return None
msg = []
- if 'metadata' in status_detail:
- for data in status_detail['metadata']:
- fields = ', '.join(data['fields'])
- msg.append('- %s (%s)\n' % (data['summary'], fields))
-
- for key in ['url', 'archive']:
- if key in status_detail:
- _detail = status_detail[key]
- fields = _detail.get('fields')
- suffix_msg = ''
- if fields:
- suffix_msg = ' (%s)' % ', '.join(fields)
- msg.append('- %s%s\n' % (_detail['summary'], suffix_msg))
+ for key in ['metadata', 'archive']:
+ _detail = status_detail.get(key)
+ if _detail:
+ for data in _detail:
+ suffix_msg = ''
+ fields = data.get('fields')
+ if fields:
+ suffix_msg = ' (%s)' % ', '.join(fields)
+ msg.append('- %s%s\n' % (data['summary'], suffix_msg))
+
+ _detail = status_detail.get('url')
+ if _detail:
+ fields = _detail.get('fields')
+ suffix_msg = ''
+ if fields:
+ suffix_msg = ' (%s)' % ', '.join(fields)
+ msg.append('- %s%s\n' % (_detail['summary'], suffix_msg))
if not msg:
return None
diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -4,7 +4,9 @@
# See top-level LICENSE file for more information
import json
-import patoolib
+import re
+import tarfile
+import zipfile
from rest_framework import status
@@ -16,10 +18,11 @@
MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing'
ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing'
-
-MANDATORY_ARCHIVE_UNREADABLE = 'Deposit was rejected because at least one of its associated archives was not readable' # noqa
-MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected'
INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa
+MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa
+MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa
+MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported'
+MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected'
class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin):
@@ -43,42 +46,63 @@
deposit, request_type=ARCHIVE_TYPE))
if len(requests) == 0: # no associated archive is refused
return False, {
- 'archive': {
+ 'archive': [{
'summary': MANDATORY_ARCHIVE_MISSING,
- }
+ }]
}
- rejected_dr_ids = []
- for dr in requests:
- _path = dr.archive.path
- check = self._check_archive(_path)
+ errors = []
+ for archive_request in requests:
+ check, error_message = self._check_archive(archive_request)
if not check:
- rejected_dr_ids.append(dr.id)
+ errors.append({
+ 'summary': error_message,
+ 'fields': [archive_request.id]
+ })
- if rejected_dr_ids:
- return False, {
- 'archive': {
- 'summary': MANDATORY_ARCHIVE_UNREADABLE,
- 'fields': rejected_dr_ids,
- }}
- return True, None
+ if not errors:
+ return True, None
+ return False, {
+ 'archive': errors
+ }
+
+ def _check_archive(self, archive_request):
+ """Check that a deposit associated archive is ok:
+ - readable
+ - supported archive format
+ - content of the archive is not a single archive
- def _check_archive(self, archive_path):
- """Check that a given archive is actually ok for reading.
+ If any of those checks are not ok, return the corresponding
+ failing check.
Args:
- archive_path (str): Archive to check
+ archive_path (DepositRequest): Archive to check
Returns:
- True if archive is successfully read, False otherwise.
+ (True, None) if archive is check compliant, (False,
+ <detail-error>) otherwise.
"""
+ archive_path = archive_request.archive.path
try:
- patoolib.test_archive(archive_path, verbosity=-1)
+ if zipfile.is_zipfile(archive_path):
+ with zipfile.ZipFile(archive_path) as f:
+ files = f.namelist()
+ elif tarfile.is_tarfile(archive_path):
+ with tarfile.open(archive_path) as f:
+ files = f.getnames()
+ else:
+ return False, MANDATORY_ARCHIVE_UNSUPPORTED
except Exception:
- return False
- else:
- return True
+ return False, MANDATORY_ARCHIVE_UNREADABLE
+ if len(files) > 1:
+ return True, None
+ element = files[0]
+ pattern = re.compile(
+ r'.*\.(zip|tar|tar.gz|.xz|tar.xz|Z|.tar.Z|bz2|tar.bz2)$')
+ if pattern.match(element): # invalid archive in archive
+ return False, MANDATORY_ARCHIVE_INVALID
+ return True, None
def _check_metadata(self, metadata):
"""Check to execute on all metadata for mandatory field presence.
diff --git a/swh/deposit/models.py b/swh/deposit/models.py
--- a/swh/deposit/models.py
+++ b/swh/deposit/models.py
@@ -13,9 +13,11 @@
from django.db import models
from django.utils.timezone import now
-from .config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED
-from .config import DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS
-from .config import DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_REJECTED
+from .config import (
+ DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL,
+ DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE,
+ DEPOSIT_STATUS_REJECTED
+)
class Dbversion(models.Model):
@@ -43,7 +45,7 @@
('expired', 'expired'),
(DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED),
(DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED),
- ('rejected', 'rejected'),
+ (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED),
('loading', 'loading'),
(DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS),
(DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE),
@@ -60,7 +62,7 @@
'(tarball ok, metadata, etc...)',
DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and '
'ready for loading',
- 'rejected': 'Deposit failed the checks',
+ DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks',
'loading': "Loading is ongoing on swh's side",
DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully '
'loaded into the Software Heritage archive',
diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py
--- a/swh/deposit/tests/api/test_deposit_check.py
+++ b/swh/deposit/tests/api/test_deposit_check.py
@@ -17,9 +17,10 @@
DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED
)
from swh.deposit.api.private.deposit_check import (
- SWHChecksDeposit,
+ SWHChecksDeposit, MANDATORY_ARCHIVE_INVALID,
MANDATORY_FIELDS_MISSING, INCOMPATIBLE_URL_FIELDS,
- MANDATORY_ARCHIVE_UNREADABLE, ALTERNATE_FIELDS_MISSING
+ MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING,
+ MANDATORY_ARCHIVE_MISSING
)
from swh.deposit.models import Deposit
@@ -61,8 +62,71 @@
self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED)
@istest
- def deposit_ko(self):
- """Invalid deposit should fail the checks (-> status rejected)
+ def deposit_invalid_tarball(self):
+ """Deposit with tarball (of 1 tarball) should fail the checks: rejected
+
+ """
+ deposit_id = self.create_deposit_archive_with_archive()
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status)
+
+ url = reverse(PRIVATE_CHECK_DEPOSIT,
+ args=[self.collection.name, deposit.id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ data = json.loads(response.content.decode('utf-8'))
+ self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED)
+ details = data['details']
+ # archive checks failure
+ self.assertEqual(len(details['archive']), 1)
+ self.assertEqual(details['archive'][0]['summary'],
+ MANDATORY_ARCHIVE_INVALID)
+ # metadata check failure
+ self.assertEqual(len(details['metadata']), 2)
+ mandatory = details['metadata'][0]
+ self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING)
+ self.assertEqual(set(mandatory['fields']),
+ set(['url', 'external_identifier', 'author']))
+ alternate = details['metadata'][1]
+ self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING)
+ self.assertEqual(alternate['fields'], ['name or title'])
+ # url check failure
+ self.assertEqual(details['url']['summary'], INCOMPATIBLE_URL_FIELDS)
+
+ deposit = Deposit.objects.get(pk=deposit.id)
+ self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED)
+
+ @istest
+ def deposit_ko_missing_tarball(self):
+ """Deposit without archive should fail the checks: rejected
+
+ """
+ deposit_id = self.create_deposit_ready() # no archive, only atom
+ deposit = Deposit.objects.get(pk=deposit_id)
+ self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status)
+
+ url = reverse(PRIVATE_CHECK_DEPOSIT,
+ args=[self.collection.name, deposit.id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+ data = json.loads(response.content.decode('utf-8'))
+ self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED)
+ details = data['details']
+ # archive checks failure
+ self.assertEqual(len(details['archive']), 1)
+ self.assertEqual(details['archive'][0]['summary'],
+ MANDATORY_ARCHIVE_MISSING)
+ deposit = Deposit.objects.get(pk=deposit.id)
+ self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED)
+
+ @istest
+ def deposit_ko_unsupported_tarball(self):
+ """Deposit with an unsupported tarball should fail the checks: rejected
"""
deposit_id = self.create_deposit_with_invalid_archive()
@@ -80,9 +144,9 @@
self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED)
details = data['details']
# archive checks failure
- self.assertEqual(len(details['archive']['fields']), 1)
- self.assertEqual(details['archive']['summary'],
- MANDATORY_ARCHIVE_UNREADABLE)
+ self.assertEqual(len(details['archive']), 1)
+ self.assertEqual(details['archive'][0]['summary'],
+ MANDATORY_ARCHIVE_UNSUPPORTED)
# metadata check failure
self.assertEqual(len(details['metadata']), 2)
mandatory = details['metadata'][0]
diff --git a/swh/deposit/tests/api/test_deposit_status.py b/swh/deposit/tests/api/test_deposit_status.py
--- a/swh/deposit/tests/api/test_deposit_status.py
+++ b/swh/deposit/tests/api/test_deposit_status.py
@@ -143,16 +143,16 @@
'fields': ['name or title', 'url or badurl']
}
],
- 'archive': {
+ 'archive': [{
'summary': 'Unreadable archive',
- 'fields': ['1', '2'],
- },
+ 'fields': ['1'],
+ }],
}
expected_status_detail = '''- Mandatory fields missing (url, title)
- Alternate fields missing (name or title, url or badurl)
+- Unreadable archive (1)
- At least one url field must be compatible with the client's domain name. The following url fields failed the check (blahurl, testurl)
-- Unreadable archive (1, 2)
''' # noqa
actual_status_detail = convert_status_detail(status_detail)
@@ -172,9 +172,21 @@
'fields': ['name'],
},
],
+ 'archive': [
+ {
+ 'summary': 'Invalid archive',
+ 'fields': ['2'],
+ },
+ {
+ 'summary': 'Unsupported archive',
+ 'fields': ['1'],
+ }
+ ],
}
expected_status_detail = '''- Mandatory fields missing (name)
+- Invalid archive (2)
+- Unsupported archive (1)
- At least one compatible url field. Failed (testurl)
'''
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -7,6 +7,7 @@
import hashlib
import os
import shutil
+import tarfile
import tempfile
from django.core.urlresolvers import reverse
@@ -28,6 +29,32 @@
from swh.core import tarball
+def compute_info(archive_path):
+ """Given a path, compute information on path.
+
+ """
+ with open(archive_path, 'rb') as f:
+ length = 0
+ sha1sum = hashlib.sha1()
+ md5sum = hashlib.md5()
+ data = b''
+ for chunk in f:
+ sha1sum.update(chunk)
+ md5sum.update(chunk)
+ length += len(chunk)
+ data += chunk
+
+ return {
+ 'dir': os.path.dirname(archive_path),
+ 'name': os.path.basename(archive_path),
+ 'path': archive_path,
+ 'length': length,
+ 'sha1sum': sha1sum.hexdigest(),
+ 'md5sum': md5sum.hexdigest(),
+ 'data': data
+ }
+
+
def create_arborescence_zip(root_path, archive_name, filename, content,
up_to_size=None):
"""Build an archive named archive_name in the root_path.
@@ -61,27 +88,17 @@
zip_path = dir_path + '.zip'
zip_path = tarball.compress(zip_path, 'zip', dir_path)
+ return compute_info(zip_path)
- with open(zip_path, 'rb') as f:
- length = 0
- sha1sum = hashlib.sha1()
- md5sum = hashlib.md5()
- data = b''
- for chunk in f:
- sha1sum.update(chunk)
- md5sum.update(chunk)
- length += len(chunk)
- data += chunk
- return {
- 'dir': archive_path_dir,
- 'name': archive_name,
- 'data': data,
- 'path': zip_path,
- 'sha1sum': sha1sum.hexdigest(),
- 'md5sum': md5sum.hexdigest(),
- 'length': length,
- }
+def create_archive_with_archive(root_path, name, archive):
+ """Create an archive holding another.
+
+ """
+ invalid_archive_path = os.path.join(root_path, name)
+ with tarfile.open(invalid_archive_path, 'w:gz') as _archive:
+ _archive.add(archive['path'], arcname=archive['name'])
+ return compute_info(invalid_archive_path)
@attr('fs')
@@ -158,6 +175,29 @@
deposit_id = int(response_content['deposit_id'])
return deposit_id
+ def create_deposit_archive_with_archive(self):
+ invalid_archive = create_archive_with_archive(
+ self.root_path, 'invalid.tar.gz', self.archive)
+
+ response = self.client.post(
+ reverse(COL_IRI, args=[self.collection.name]),
+ content_type='application/x-tar',
+ data=invalid_archive['data'],
+ CONTENT_LENGTH=invalid_archive['length'],
+ HTTP_MD5SUM=invalid_archive['md5sum'],
+ HTTP_SLUG='external-id',
+ HTTP_IN_PROGRESS=False,
+ HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
+ invalid_archive['name'], ))
+
+ # then
+ self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+ response_content = parse_xml(BytesIO(response.content))
+ _status = response_content['deposit_status']
+ self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED)
+ deposit_id = int(response_content['deposit_id'])
+ return deposit_id
+
def update_binary_deposit(self, deposit_id, status_partial=False):
# update existing deposit with atom entry metadata
response = self.client.post(
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 8:01 AM (13 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228226
Attached To
D380: Reject deposit containing a single archive
Event Timeline
Log In to Comment