diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py deleted file mode 100644 --- a/swh/deposit/loader/loader.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) 2017-2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import os -import tempfile - -from swh.model import hashutil -from swh.loader.tar import loader -from swh.loader.core.loader import BufferedLoader - -from ..client import PrivateApiDepositClient - - -class DepositLoader(loader.LegacyLocalTarLoader): - """Deposit loader implementation. - - This is a subclass of the :class:TarLoader as the main goal of - this class is to first retrieve the deposit's tarball contents as - one and its associated metadata. Then provide said tarball to be - loaded by the TarLoader. - - This will: - - - retrieves the deposit's archive locally - - provide the archive to be loaded by the tar loader - - clean up the temporary location used to retrieve the archive locally - - update the deposit's status accordingly - - """ - CONFIG_BASE_FILENAME = 'loader/deposit' - - ADDITIONAL_CONFIG = { - 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'), - } - - visit_type = 'deposit' - - def __init__(self, client=None): - super().__init__( - logging_class='swh.deposit.loader.loader.DepositLoader') - self.deposit_client = client if client else PrivateApiDepositClient() - - def load(self, *, archive_url, deposit_meta_url, deposit_update_url): - return BufferedLoader.load( - self, - archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - def prepare_origin_visit(self, *, deposit_meta_url, **kwargs): - self.metadata = self.deposit_client.metadata_get( - deposit_meta_url) - self.origin = self.metadata['origin'] - self.visit_date = None - - def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url): - """Prepare the loading by first retrieving the deposit's raw archive - content. - - """ - self.deposit_update_url = deposit_update_url - self.deposit_client.status_update(deposit_update_url, 'loading') - - temporary_directory = tempfile.TemporaryDirectory() - self.temporary_directory = temporary_directory - archive_path = os.path.join(temporary_directory.name, 'archive.zip') - archive = self.deposit_client.archive_get( - archive_url, archive_path) - - metadata = self.metadata - revision = metadata['revision'] - branch_name = metadata['branch_name'] - self.origin_metadata = metadata['origin_metadata'] - self.prepare_metadata() - - super().prepare(tar_path=archive, - origin=self.origin, - revision=revision, - branch_name=branch_name) - - def store_metadata(self): - """Storing the origin_metadata during the load processus. - - Provider_id and tool_id are resolved during the prepare() method. - - """ - visit_date = self.visit_date - provider_id = self.origin_metadata['provider']['provider_id'] - tool_id = self.origin_metadata['tool']['tool_id'] - metadata = self.origin_metadata['metadata'] - try: - self.send_origin_metadata(visit_date, provider_id, - tool_id, metadata) - except Exception: - self.log.exception('Problem when storing origin_metadata') - raise - - def post_load(self, success=True): - """Updating the deposit's status according to its loading status. - - If not successful, we update its status to 'failed'. - Otherwise, we update its status to 'done' and pass along its - associated revision. - - """ - try: - if not success: - self.deposit_client.status_update(self.deposit_update_url, - status='failed') - return - - revisions = self.objects['revision'] - # Retrieve the revision - [rev_id] = revisions.keys() - rev = revisions[rev_id] - if rev_id: - rev_id = hashutil.hash_to_hex(rev_id) - - dir_id = rev['directory'] - if dir_id: - dir_id = hashutil.hash_to_hex(dir_id) - - # update the deposit's status to success with its - # revision-id and directory-id - self.deposit_client.status_update( - self.deposit_update_url, - status='done', - revision_id=rev_id, - directory_id=dir_id, - origin_url=self.origin['url']) - except Exception: - self.log.exception( - 'Problem when trying to update the deposit\'s status') - - def cleanup(self): - """Clean up temporary directory where we retrieved the tarball. - - """ - super().cleanup() - self.temporary_directory.cleanup() diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py --- a/swh/deposit/loader/tasks.py +++ b/swh/deposit/loader/tasks.py @@ -5,29 +5,9 @@ from celery import shared_task -from swh.deposit.loader.loader import DepositLoader from swh.deposit.loader.checker import DepositChecker -@shared_task(name=__name__ + '.LoadDepositArchiveTsk') -def load_deposit(archive_url, deposit_meta_url, deposit_update_url): - """Deposit archive loading task described by the following steps: - - 1. Retrieve tarball from deposit's private api and store - locally in a temporary directory - 2. Trigger the loading - 3. clean up the temporary directory - 4. Update the deposit's status according to result using the - deposit's private update status api - - """ - loader = DepositLoader() - return loader.load( - archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - @shared_task(name=__name__ + '.ChecksDepositTsk') def check_deposit(deposit_check_url): """Check a deposit's status diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py --- a/swh/deposit/tests/loader/conftest.py +++ b/swh/deposit/tests/loader/conftest.py @@ -14,7 +14,6 @@ from swh.scheduler.tests.conftest import * # noqa from swh.storage.tests.conftest import * # noqa from swh.deposit.loader.checker import DepositChecker -from swh.deposit.loader.loader import DepositLoader @pytest.fixture(scope='session') # type: ignore # expected redefinition @@ -52,11 +51,6 @@ return DepositChecker() -@pytest.fixture -def deposit_loader(swh_config): - return DepositLoader() - - @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put method diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py deleted file mode 100644 --- a/swh/deposit/tests/loader/test_loader.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (C) 2017-2019 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from swh.deposit.config import ( - PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT -) -from django.urls import reverse -from swh.model.hashutil import hash_to_bytes - -from .common import get_stats, check_snapshot - - -def test_inject_deposit_ready( - swh_config, requests_mock_datadir, datadir, deposit_loader): - """Load a deposit which is ready - - """ - args = ['test', 999] - archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) - deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) - deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) - - # when - res = deposit_loader.load( - archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - # then - assert res['status'] == 'eventful' - stats = get_stats(deposit_loader.storage) - - assert { - 'content': 303, - 'skipped_content': 0, - 'directory': 12, - 'origin': 1, - 'origin_visit': 1, - 'person': 1, - 'release': 0, - 'revision': 1, - 'snapshot': 1, - } == stats - - origin_url = 'https://hal-test.archives-ouvertes.fr/some-external-id' - rev_id = 'b1bef04d90ef3ba645df4c4f945748c173a4e9a2' - dir_id = 'bed9acbf2a4502499f659e65a2ab77096bd46a1d' - - expected_revision = { - 'author': { - 'name': b'Software Heritage', - 'fullname': b'Software Heritage', - 'email': b'robot@softwareheritage.org'}, - 'committer': { - 'name': b'Software Heritage', - 'fullname': b'Software Heritage', - 'email': b'robot@softwareheritage.org'}, - 'committer_date': { - 'negative_utc': 'false', - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428}}, - 'date': { - 'negative_utc': 'false', - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428}}, - 'message': b'test: Deposit 999 in collection test', - 'metadata': { - '@xmlns': ['http://www.w3.org/2005/Atom'], - 'author': ['some awesome author', 'another one', 'no one'], - 'codemeta:dateCreated': '2017-10-07T15:17:08Z', - 'external_identifier': 'some-external-id', - 'url': origin_url, - 'original_artifact': [ - { - 'name': 'archive.zip', - 'archive_type': 'tar', - 'length': 725946, - 'blake2s256': '04fffd328441d216c92492ad72d37388d8c77889880b069151298786fd48d889', # noqa - 'sha256': '31e066137a962676e89f69d1b65382de95a7ef7d914b8cb956f41ea72e0f516b', # noqa - 'sha1': 'f7bebf6f9c62a2295e889f66e05ce9bfaed9ace3', - 'sha1_git': 'cae6b33cc33faafd2d6bd86c6b4273f9338c69c2' - } - ] - }, - 'synthetic': True, - 'type': 'tar', - 'parents': [], - 'directory': hash_to_bytes(dir_id), - 'id': hash_to_bytes(rev_id), - } - - rev = next(deposit_loader.storage.revision_get([hash_to_bytes(rev_id)])) - assert rev is not None - assert expected_revision == rev - - expected_snapshot = { - 'id': '823109c16f9948c6f88cc5dec8e278da1487f06d', - 'branches': { - 'master': { - 'target': rev_id, - 'target_type': 'revision' - } - } - } - - check_snapshot(expected_snapshot, deposit_loader.storage) diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py --- a/swh/deposit/tests/loader/test_tasks.py +++ b/swh/deposit/tests/loader/test_tasks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,24 +6,6 @@ from unittest.mock import patch -@patch('swh.deposit.loader.loader.DepositLoader.load') -def deposit_load(loader, swh_config, swh_app, celery_session_worker): - loader.return_value = {'status': 'eventful'} - - res = swh_app.send_task( - 'swh.deposit.loader.tasks.LoadDepositArchiveTsk', - args=('archive_url', 'deposit_meta_url', 'deposit_update_url')) - assert res - res.wait() - assert res.successful() - - assert res.result == {'status': 'eventful'} - loader.assert_called_once_with( - archive_url='archive_url', - deposit_meta_url='deposit_meta_url', - deposit_update_url='deposit_update_url') - - @patch('swh.deposit.loader.checker.DepositChecker.check') def deposit_check(checker, swh_config, swh_app, celery_session_worker): checker.return_value = {'status': 'uneventful'}