Page MenuHomeSoftware Heritage

D2402.id8458.diff
No OneTemporary

D2402.id8458.diff

diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py
deleted file mode 100644
--- a/swh/deposit/loader/loader.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import os
-import tempfile
-
-from swh.model import hashutil
-from swh.loader.tar import loader
-from swh.loader.core.loader import BufferedLoader
-
-from ..client import PrivateApiDepositClient
-
-
-class DepositLoader(loader.LegacyLocalTarLoader):
- """Deposit loader implementation.
-
- This is a subclass of the :class:TarLoader as the main goal of
- this class is to first retrieve the deposit's tarball contents as
- one and its associated metadata. Then provide said tarball to be
- loaded by the TarLoader.
-
- This will:
-
- - retrieves the deposit's archive locally
- - provide the archive to be loaded by the tar loader
- - clean up the temporary location used to retrieve the archive locally
- - update the deposit's status accordingly
-
- """
- CONFIG_BASE_FILENAME = 'loader/deposit'
-
- ADDITIONAL_CONFIG = {
- 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'),
- }
-
- visit_type = 'deposit'
-
- def __init__(self, client=None):
- super().__init__(
- logging_class='swh.deposit.loader.loader.DepositLoader')
- self.deposit_client = client if client else PrivateApiDepositClient()
-
- def load(self, *, archive_url, deposit_meta_url, deposit_update_url):
- return BufferedLoader.load(
- self,
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
- def prepare_origin_visit(self, *, deposit_meta_url, **kwargs):
- self.metadata = self.deposit_client.metadata_get(
- deposit_meta_url)
- self.origin = self.metadata['origin']
- self.visit_date = None
-
- def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url):
- """Prepare the loading by first retrieving the deposit's raw archive
- content.
-
- """
- self.deposit_update_url = deposit_update_url
- self.deposit_client.status_update(deposit_update_url, 'loading')
-
- temporary_directory = tempfile.TemporaryDirectory()
- self.temporary_directory = temporary_directory
- archive_path = os.path.join(temporary_directory.name, 'archive.zip')
- archive = self.deposit_client.archive_get(
- archive_url, archive_path)
-
- metadata = self.metadata
- revision = metadata['revision']
- branch_name = metadata['branch_name']
- self.origin_metadata = metadata['origin_metadata']
- self.prepare_metadata()
-
- super().prepare(tar_path=archive,
- origin=self.origin,
- revision=revision,
- branch_name=branch_name)
-
- def store_metadata(self):
- """Storing the origin_metadata during the load processus.
-
- Provider_id and tool_id are resolved during the prepare() method.
-
- """
- visit_date = self.visit_date
- provider_id = self.origin_metadata['provider']['provider_id']
- tool_id = self.origin_metadata['tool']['tool_id']
- metadata = self.origin_metadata['metadata']
- try:
- self.send_origin_metadata(visit_date, provider_id,
- tool_id, metadata)
- except Exception:
- self.log.exception('Problem when storing origin_metadata')
- raise
-
- def post_load(self, success=True):
- """Updating the deposit's status according to its loading status.
-
- If not successful, we update its status to 'failed'.
- Otherwise, we update its status to 'done' and pass along its
- associated revision.
-
- """
- try:
- if not success:
- self.deposit_client.status_update(self.deposit_update_url,
- status='failed')
- return
-
- revisions = self.objects['revision']
- # Retrieve the revision
- [rev_id] = revisions.keys()
- rev = revisions[rev_id]
- if rev_id:
- rev_id = hashutil.hash_to_hex(rev_id)
-
- dir_id = rev['directory']
- if dir_id:
- dir_id = hashutil.hash_to_hex(dir_id)
-
- # update the deposit's status to success with its
- # revision-id and directory-id
- self.deposit_client.status_update(
- self.deposit_update_url,
- status='done',
- revision_id=rev_id,
- directory_id=dir_id,
- origin_url=self.origin['url'])
- except Exception:
- self.log.exception(
- 'Problem when trying to update the deposit\'s status')
-
- def cleanup(self):
- """Clean up temporary directory where we retrieved the tarball.
-
- """
- super().cleanup()
- self.temporary_directory.cleanup()
diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py
--- a/swh/deposit/loader/tasks.py
+++ b/swh/deposit/loader/tasks.py
@@ -5,29 +5,9 @@
from celery import shared_task
-from swh.deposit.loader.loader import DepositLoader
from swh.deposit.loader.checker import DepositChecker
-@shared_task(name=__name__ + '.LoadDepositArchiveTsk')
-def load_deposit(archive_url, deposit_meta_url, deposit_update_url):
- """Deposit archive loading task described by the following steps:
-
- 1. Retrieve tarball from deposit's private api and store
- locally in a temporary directory
- 2. Trigger the loading
- 3. clean up the temporary directory
- 4. Update the deposit's status according to result using the
- deposit's private update status api
-
- """
- loader = DepositLoader()
- return loader.load(
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
-
@shared_task(name=__name__ + '.ChecksDepositTsk')
def check_deposit(deposit_check_url):
"""Check a deposit's status
diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py
--- a/swh/deposit/signals.py
+++ b/swh/deposit/signals.py
@@ -94,25 +94,10 @@
elif (instance.status == DEPOSIT_STATUS_VERIFIED and
not instance.load_task_id):
- version = default_config.config.get('loader-version', 1)
- # schedule deposit loading
- if int(version) == 1: # default version
- from swh.deposit.config import PRIVATE_GET_RAW_CONTENT
- from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
- from swh.deposit.config import PRIVATE_PUT_DEPOSIT
- archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
- meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
- update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
-
- task = create_oneshot_task_dict('load-deposit',
- archive_url=archive_url,
- deposit_meta_url=meta_url,
- deposit_update_url=update_url)
- else: # new version
- url = utils.origin_url_from(instance)
- task = create_oneshot_task_dict(
- 'load-deposit',
- url=url, deposit_id=instance.id)
+ url = utils.origin_url_from(instance)
+ task = create_oneshot_task_dict(
+ 'load-deposit',
+ url=url, deposit_id=instance.id)
load_task_id = schedule_task(default_config.scheduler, task)
instance.load_task_id = load_task_id
diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py
--- a/swh/deposit/tests/loader/conftest.py
+++ b/swh/deposit/tests/loader/conftest.py
@@ -14,7 +14,6 @@
from swh.scheduler.tests.conftest import * # noqa
from swh.storage.tests.conftest import * # noqa
from swh.deposit.loader.checker import DepositChecker
-from swh.deposit.loader.loader import DepositLoader
@pytest.fixture(scope='session') # type: ignore # expected redefinition
@@ -52,11 +51,6 @@
return DepositChecker()
-@pytest.fixture
-def deposit_loader(swh_config):
- return DepositLoader()
-
-
@pytest.fixture
def requests_mock_datadir(datadir, requests_mock_datadir):
"""Override default behavior to deal with put method
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
deleted file mode 100644
--- a/swh/deposit/tests/loader/test_loader.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-from swh.deposit.config import (
- PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT
-)
-from django.urls import reverse
-from swh.model.hashutil import hash_to_bytes
-
-from .common import get_stats, check_snapshot
-
-
-def test_inject_deposit_ready(
- swh_config, requests_mock_datadir, datadir, deposit_loader):
- """Load a deposit which is ready
-
- """
- args = ['test', 999]
- archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
- deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
- deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
-
- # when
- res = deposit_loader.load(
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
- # then
- assert res['status'] == 'eventful'
- stats = get_stats(deposit_loader.storage)
-
- assert {
- 'content': 303,
- 'skipped_content': 0,
- 'directory': 12,
- 'origin': 1,
- 'origin_visit': 1,
- 'person': 1,
- 'release': 0,
- 'revision': 1,
- 'snapshot': 1,
- } == stats
-
- origin_url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
- rev_id = 'b1bef04d90ef3ba645df4c4f945748c173a4e9a2'
- dir_id = 'bed9acbf2a4502499f659e65a2ab77096bd46a1d'
-
- expected_revision = {
- 'author': {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'},
- 'committer': {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'},
- 'committer_date': {
- 'negative_utc': 'false',
- 'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1507389428}},
- 'date': {
- 'negative_utc': 'false',
- 'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1507389428}},
- 'message': b'test: Deposit 999 in collection test',
- 'metadata': {
- '@xmlns': ['http://www.w3.org/2005/Atom'],
- 'author': ['some awesome author', 'another one', 'no one'],
- 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
- 'external_identifier': 'some-external-id',
- 'url': origin_url,
- 'original_artifact': [
- {
- 'name': 'archive.zip',
- 'archive_type': 'tar',
- 'length': 725946,
- 'blake2s256': '04fffd328441d216c92492ad72d37388d8c77889880b069151298786fd48d889', # noqa
- 'sha256': '31e066137a962676e89f69d1b65382de95a7ef7d914b8cb956f41ea72e0f516b', # noqa
- 'sha1': 'f7bebf6f9c62a2295e889f66e05ce9bfaed9ace3',
- 'sha1_git': 'cae6b33cc33faafd2d6bd86c6b4273f9338c69c2'
- }
- ]
- },
- 'synthetic': True,
- 'type': 'tar',
- 'parents': [],
- 'directory': hash_to_bytes(dir_id),
- 'id': hash_to_bytes(rev_id),
- }
-
- rev = next(deposit_loader.storage.revision_get([hash_to_bytes(rev_id)]))
- assert rev is not None
- assert expected_revision == rev
-
- expected_snapshot = {
- 'id': '823109c16f9948c6f88cc5dec8e278da1487f06d',
- 'branches': {
- 'master': {
- 'target': rev_id,
- 'target_type': 'revision'
- }
- }
- }
-
- check_snapshot(expected_snapshot, deposit_loader.storage)
diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py
--- a/swh/deposit/tests/loader/test_tasks.py
+++ b/swh/deposit/tests/loader/test_tasks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,24 +6,6 @@
from unittest.mock import patch
-@patch('swh.deposit.loader.loader.DepositLoader.load')
-def deposit_load(loader, swh_config, swh_app, celery_session_worker):
- loader.return_value = {'status': 'eventful'}
-
- res = swh_app.send_task(
- 'swh.deposit.loader.tasks.LoadDepositArchiveTsk',
- args=('archive_url', 'deposit_meta_url', 'deposit_update_url'))
- assert res
- res.wait()
- assert res.successful()
-
- assert res.result == {'status': 'eventful'}
- loader.assert_called_once_with(
- archive_url='archive_url',
- deposit_meta_url='deposit_meta_url',
- deposit_update_url='deposit_update_url')
-
-
@patch('swh.deposit.loader.checker.DepositChecker.check')
def deposit_check(checker, swh_config, swh_app, celery_session_worker):
checker.return_value = {'status': 'uneventful'}

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 1:08 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229399

Event Timeline