diff --git a/PKG-INFO b/PKG-INFO index 77466c87..1346caef 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.46 +Version: 0.0.47 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 77466c87..1346caef 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.46 +Version: 0.0.47 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh/deposit/loader/checker.py b/swh/deposit/loader/checker.py index b7677f52..80cd35c8 100644 --- a/swh/deposit/loader/checker.py +++ b/swh/deposit/loader/checker.py @@ -1,20 +1,27 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from ..client import PrivateApiDepositClient class DepositChecker(): """Deposit checker implementation. Trigger deposit's checks through the private api. """ def __init__(self, client=None): super().__init__() self.client = client if client else PrivateApiDepositClient() def check(self, deposit_check_url): - return self.client.check(deposit_check_url) + try: + self.client.check(deposit_check_url) + except Exception: + self.log.exception("Failure during check on '%s'" % ( + deposit_check_url, )) + return {'status': 'failed'} + else: + return {'status': 'eventful'} diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py index 855ee415..d8cb5bd9 100644 --- a/swh/deposit/loader/loader.py +++ b/swh/deposit/loader/loader.py @@ -1,129 +1,129 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import os import tempfile from swh.model import hashutil from swh.loader.tar import loader from swh.loader.core.loader import SWHLoader from ..client import PrivateApiDepositClient class DepositLoader(loader.TarLoader): """Deposit loader implementation. This is a subclass of the :class:TarLoader as the main goal of this class is to first retrieve the deposit's tarball contents as one and its associated metadata. Then provide said tarball to be loaded by the TarLoader. This will: - retrieves the deposit's archive locally - provide the archive to be loaded by the tar loader - clean up the temporary location used to retrieve the archive locally - update the deposit's status accordingly """ CONFIG_BASE_FILENAME = 'loader/deposit' ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'), } def __init__(self, client=None): super().__init__( logging_class='swh.deposit.loader.loader.DepositLoader') self.client = client if client else PrivateApiDepositClient() def load(self, *, archive_url, deposit_meta_url, deposit_update_url): - SWHLoader.load( + return SWHLoader.load( self, archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url): """Prepare the loading by first retrieving the deposit's raw archive content. """ self.deposit_update_url = deposit_update_url self.client.status_update(deposit_update_url, 'loading') temporary_directory = tempfile.TemporaryDirectory() self.temporary_directory = temporary_directory archive_path = os.path.join(temporary_directory.name, 'archive.zip') archive = self.client.archive_get( archive_url, archive_path, log=self.log) metadata = self.client.metadata_get( deposit_meta_url, log=self.log) origin = metadata['origin'] visit_date = datetime.datetime.now(tz=datetime.timezone.utc) revision = metadata['revision'] occurrence = metadata['occurrence'] self.origin_metadata = metadata['origin_metadata'] self.prepare_metadata() super().prepare(tar_path=archive, origin=origin, visit_date=visit_date, revision=revision, occurrences=[occurrence]) def store_metadata(self): """Storing the origin_metadata during the load processus. Provider_id and tool_id are resolved during the prepare() method. """ origin_id = self.origin_id visit_date = self.visit_date provider_id = self.origin_metadata['provider']['provider_id'] tool_id = self.origin_metadata['tool']['tool_id'] metadata = self.origin_metadata['metadata'] try: self.send_origin_metadata(origin_id, visit_date, provider_id, tool_id, metadata) except Exception: self.log.exception('Problem when storing origin_metadata') raise def post_load(self, success=True): """Updating the deposit's status according to its loading status. If not successful, we update its status to 'failed'. Otherwise, we update its status to 'done' and pass along its associated revision. """ try: if not success: self.client.status_update(self.deposit_update_url, status='failed') return # first retrieve the new revision [rev_id] = self.objects['revision'].keys() if rev_id: rev_id_hex = hashutil.hash_to_hex(rev_id) # then update the deposit's status to success with its # revision-id self.client.status_update(self.deposit_update_url, status='done', revision_id=rev_id_hex) except Exception: self.log.exception( 'Problem when trying to update the deposit\'s status') def cleanup(self): """Clean up temporary directory where we retrieved the tarball. """ super().cleanup() self.temporary_directory.cleanup() diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py index 57a49e43..770df259 100644 --- a/swh/deposit/loader/tasks.py +++ b/swh/deposit/loader/tasks.py @@ -1,50 +1,50 @@ -# Copyright (C) 2015-2017 The Software Heritage developers +# Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.task import Task from swh.deposit.loader import loader, checker class LoadDepositArchiveTsk(Task): """Deposit archive loading task described by the following steps: 1. Retrieve tarball from deposit's private api and store locally in a temporary directory 2. Trigger the loading 3. clean up the temporary directory 4. Update the deposit's status according to result using the deposit's private update status api """ task_queue = 'swh_loader_deposit' def run_task(self, *, archive_url, deposit_meta_url, deposit_update_url): """Import a deposit tarball into swh. Args: see :func:`DepositLoader.load`. """ _loader = loader.DepositLoader() _loader.log = self.log - _loader.load(archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) + return _loader.load(archive_url=archive_url, + deposit_meta_url=deposit_meta_url, + deposit_update_url=deposit_update_url) class ChecksDepositTsk(Task): """Deposit checks task. """ task_queue = 'swh_checker_deposit' def run_task(self, deposit_check_url): """Check a deposit's status Args: see :func:`DepositChecker.check`. """ _checker = checker.DepositChecker() _checker.log = self.log - _checker.check(deposit_check_url) + return _checker.check(deposit_check_url) diff --git a/swh/deposit/tests/loader/test_checker.py b/swh/deposit/tests/loader/test_checker.py index 6747b269..cb0d111a 100644 --- a/swh/deposit/tests/loader/test_checker.py +++ b/swh/deposit/tests/loader/test_checker.py @@ -1,72 +1,72 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from nose.tools import istest from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_VERIFIED from swh.deposit.config import DEPOSIT_STATUS_REJECTED from swh.deposit.loader.checker import DepositChecker from django.core.urlresolvers import reverse from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine class DepositCheckerScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() # 2. Sets a basic client which accesses the test data checker_client = SWHDepositTestClient(client=self.client, config=CLIENT_TEST_CONFIG) # 3. setup loader with no persistence and that client self.checker = DepositChecker(client=checker_client) @istest def check_deposit_ready(self): """Check a valid deposit deposited should result in ready state """ # 1. create a deposit with archive and metadata deposit_id = self.create_simple_binary_deposit() deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) # when - actual_status = self.checker.check(deposit_check_url=deposit_check_url) + actual_result = self.checker.check(deposit_check_url=deposit_check_url) # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) - self.assertEquals(actual_status, DEPOSIT_STATUS_VERIFIED) + self.assertEquals(actual_result, {'status': 'eventful'}) @istest def check_deposit_rejected(self): """Check an invalid deposit deposited should result in rejected """ # 1. create a deposit with archive and metadata deposit_id = self.create_invalid_deposit() args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) # when - actual_status = self.checker.check(deposit_check_url=deposit_check_url) + actual_result = self.checker.check(deposit_check_url=deposit_check_url) # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) - self.assertEquals(actual_status, DEPOSIT_STATUS_REJECTED) + self.assertEquals(actual_result, {'status': 'eventful'}) diff --git a/version.txt b/version.txt index 1e7be37c..ad1aa90e 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.46-0-g6620254 \ No newline at end of file +v0.0.47-0-g43dcea8 \ No newline at end of file