Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342961
D2402.id8458.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Subscribers
None
D2402.id8458.diff
View Options
diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py
deleted file mode 100644
--- a/swh/deposit/loader/loader.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import os
-import tempfile
-
-from swh.model import hashutil
-from swh.loader.tar import loader
-from swh.loader.core.loader import BufferedLoader
-
-from ..client import PrivateApiDepositClient
-
-
-class DepositLoader(loader.LegacyLocalTarLoader):
- """Deposit loader implementation.
-
- This is a subclass of the :class:TarLoader as the main goal of
- this class is to first retrieve the deposit's tarball contents as
- one and its associated metadata. Then provide said tarball to be
- loaded by the TarLoader.
-
- This will:
-
- - retrieves the deposit's archive locally
- - provide the archive to be loaded by the tar loader
- - clean up the temporary location used to retrieve the archive locally
- - update the deposit's status accordingly
-
- """
- CONFIG_BASE_FILENAME = 'loader/deposit'
-
- ADDITIONAL_CONFIG = {
- 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'),
- }
-
- visit_type = 'deposit'
-
- def __init__(self, client=None):
- super().__init__(
- logging_class='swh.deposit.loader.loader.DepositLoader')
- self.deposit_client = client if client else PrivateApiDepositClient()
-
- def load(self, *, archive_url, deposit_meta_url, deposit_update_url):
- return BufferedLoader.load(
- self,
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
- def prepare_origin_visit(self, *, deposit_meta_url, **kwargs):
- self.metadata = self.deposit_client.metadata_get(
- deposit_meta_url)
- self.origin = self.metadata['origin']
- self.visit_date = None
-
- def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url):
- """Prepare the loading by first retrieving the deposit's raw archive
- content.
-
- """
- self.deposit_update_url = deposit_update_url
- self.deposit_client.status_update(deposit_update_url, 'loading')
-
- temporary_directory = tempfile.TemporaryDirectory()
- self.temporary_directory = temporary_directory
- archive_path = os.path.join(temporary_directory.name, 'archive.zip')
- archive = self.deposit_client.archive_get(
- archive_url, archive_path)
-
- metadata = self.metadata
- revision = metadata['revision']
- branch_name = metadata['branch_name']
- self.origin_metadata = metadata['origin_metadata']
- self.prepare_metadata()
-
- super().prepare(tar_path=archive,
- origin=self.origin,
- revision=revision,
- branch_name=branch_name)
-
- def store_metadata(self):
- """Storing the origin_metadata during the load processus.
-
- Provider_id and tool_id are resolved during the prepare() method.
-
- """
- visit_date = self.visit_date
- provider_id = self.origin_metadata['provider']['provider_id']
- tool_id = self.origin_metadata['tool']['tool_id']
- metadata = self.origin_metadata['metadata']
- try:
- self.send_origin_metadata(visit_date, provider_id,
- tool_id, metadata)
- except Exception:
- self.log.exception('Problem when storing origin_metadata')
- raise
-
- def post_load(self, success=True):
- """Updating the deposit's status according to its loading status.
-
- If not successful, we update its status to 'failed'.
- Otherwise, we update its status to 'done' and pass along its
- associated revision.
-
- """
- try:
- if not success:
- self.deposit_client.status_update(self.deposit_update_url,
- status='failed')
- return
-
- revisions = self.objects['revision']
- # Retrieve the revision
- [rev_id] = revisions.keys()
- rev = revisions[rev_id]
- if rev_id:
- rev_id = hashutil.hash_to_hex(rev_id)
-
- dir_id = rev['directory']
- if dir_id:
- dir_id = hashutil.hash_to_hex(dir_id)
-
- # update the deposit's status to success with its
- # revision-id and directory-id
- self.deposit_client.status_update(
- self.deposit_update_url,
- status='done',
- revision_id=rev_id,
- directory_id=dir_id,
- origin_url=self.origin['url'])
- except Exception:
- self.log.exception(
- 'Problem when trying to update the deposit\'s status')
-
- def cleanup(self):
- """Clean up temporary directory where we retrieved the tarball.
-
- """
- super().cleanup()
- self.temporary_directory.cleanup()
diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py
--- a/swh/deposit/loader/tasks.py
+++ b/swh/deposit/loader/tasks.py
@@ -5,29 +5,9 @@
from celery import shared_task
-from swh.deposit.loader.loader import DepositLoader
from swh.deposit.loader.checker import DepositChecker
-@shared_task(name=__name__ + '.LoadDepositArchiveTsk')
-def load_deposit(archive_url, deposit_meta_url, deposit_update_url):
- """Deposit archive loading task described by the following steps:
-
- 1. Retrieve tarball from deposit's private api and store
- locally in a temporary directory
- 2. Trigger the loading
- 3. clean up the temporary directory
- 4. Update the deposit's status according to result using the
- deposit's private update status api
-
- """
- loader = DepositLoader()
- return loader.load(
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
-
@shared_task(name=__name__ + '.ChecksDepositTsk')
def check_deposit(deposit_check_url):
"""Check a deposit's status
diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py
--- a/swh/deposit/signals.py
+++ b/swh/deposit/signals.py
@@ -94,25 +94,10 @@
elif (instance.status == DEPOSIT_STATUS_VERIFIED and
not instance.load_task_id):
- version = default_config.config.get('loader-version', 1)
- # schedule deposit loading
- if int(version) == 1: # default version
- from swh.deposit.config import PRIVATE_GET_RAW_CONTENT
- from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
- from swh.deposit.config import PRIVATE_PUT_DEPOSIT
- archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
- meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
- update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
-
- task = create_oneshot_task_dict('load-deposit',
- archive_url=archive_url,
- deposit_meta_url=meta_url,
- deposit_update_url=update_url)
- else: # new version
- url = utils.origin_url_from(instance)
- task = create_oneshot_task_dict(
- 'load-deposit',
- url=url, deposit_id=instance.id)
+ url = utils.origin_url_from(instance)
+ task = create_oneshot_task_dict(
+ 'load-deposit',
+ url=url, deposit_id=instance.id)
load_task_id = schedule_task(default_config.scheduler, task)
instance.load_task_id = load_task_id
diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py
--- a/swh/deposit/tests/loader/conftest.py
+++ b/swh/deposit/tests/loader/conftest.py
@@ -14,7 +14,6 @@
from swh.scheduler.tests.conftest import * # noqa
from swh.storage.tests.conftest import * # noqa
from swh.deposit.loader.checker import DepositChecker
-from swh.deposit.loader.loader import DepositLoader
@pytest.fixture(scope='session') # type: ignore # expected redefinition
@@ -52,11 +51,6 @@
return DepositChecker()
-@pytest.fixture
-def deposit_loader(swh_config):
- return DepositLoader()
-
-
@pytest.fixture
def requests_mock_datadir(datadir, requests_mock_datadir):
"""Override default behavior to deal with put method
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
deleted file mode 100644
--- a/swh/deposit/tests/loader/test_loader.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-from swh.deposit.config import (
- PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT
-)
-from django.urls import reverse
-from swh.model.hashutil import hash_to_bytes
-
-from .common import get_stats, check_snapshot
-
-
-def test_inject_deposit_ready(
- swh_config, requests_mock_datadir, datadir, deposit_loader):
- """Load a deposit which is ready
-
- """
- args = ['test', 999]
- archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
- deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
- deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
-
- # when
- res = deposit_loader.load(
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
- # then
- assert res['status'] == 'eventful'
- stats = get_stats(deposit_loader.storage)
-
- assert {
- 'content': 303,
- 'skipped_content': 0,
- 'directory': 12,
- 'origin': 1,
- 'origin_visit': 1,
- 'person': 1,
- 'release': 0,
- 'revision': 1,
- 'snapshot': 1,
- } == stats
-
- origin_url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
- rev_id = 'b1bef04d90ef3ba645df4c4f945748c173a4e9a2'
- dir_id = 'bed9acbf2a4502499f659e65a2ab77096bd46a1d'
-
- expected_revision = {
- 'author': {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'},
- 'committer': {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'},
- 'committer_date': {
- 'negative_utc': 'false',
- 'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1507389428}},
- 'date': {
- 'negative_utc': 'false',
- 'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1507389428}},
- 'message': b'test: Deposit 999 in collection test',
- 'metadata': {
- '@xmlns': ['http://www.w3.org/2005/Atom'],
- 'author': ['some awesome author', 'another one', 'no one'],
- 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
- 'external_identifier': 'some-external-id',
- 'url': origin_url,
- 'original_artifact': [
- {
- 'name': 'archive.zip',
- 'archive_type': 'tar',
- 'length': 725946,
- 'blake2s256': '04fffd328441d216c92492ad72d37388d8c77889880b069151298786fd48d889', # noqa
- 'sha256': '31e066137a962676e89f69d1b65382de95a7ef7d914b8cb956f41ea72e0f516b', # noqa
- 'sha1': 'f7bebf6f9c62a2295e889f66e05ce9bfaed9ace3',
- 'sha1_git': 'cae6b33cc33faafd2d6bd86c6b4273f9338c69c2'
- }
- ]
- },
- 'synthetic': True,
- 'type': 'tar',
- 'parents': [],
- 'directory': hash_to_bytes(dir_id),
- 'id': hash_to_bytes(rev_id),
- }
-
- rev = next(deposit_loader.storage.revision_get([hash_to_bytes(rev_id)]))
- assert rev is not None
- assert expected_revision == rev
-
- expected_snapshot = {
- 'id': '823109c16f9948c6f88cc5dec8e278da1487f06d',
- 'branches': {
- 'master': {
- 'target': rev_id,
- 'target_type': 'revision'
- }
- }
- }
-
- check_snapshot(expected_snapshot, deposit_loader.storage)
diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py
--- a/swh/deposit/tests/loader/test_tasks.py
+++ b/swh/deposit/tests/loader/test_tasks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,24 +6,6 @@
from unittest.mock import patch
-@patch('swh.deposit.loader.loader.DepositLoader.load')
-def deposit_load(loader, swh_config, swh_app, celery_session_worker):
- loader.return_value = {'status': 'eventful'}
-
- res = swh_app.send_task(
- 'swh.deposit.loader.tasks.LoadDepositArchiveTsk',
- args=('archive_url', 'deposit_meta_url', 'deposit_update_url'))
- assert res
- res.wait()
- assert res.successful()
-
- assert res.result == {'status': 'eventful'}
- loader.assert_called_once_with(
- archive_url='archive_url',
- deposit_meta_url='deposit_meta_url',
- deposit_update_url='deposit_update_url')
-
-
@patch('swh.deposit.loader.checker.DepositChecker.check')
def deposit_check(checker, swh_config, swh_app, celery_session_worker):
checker.return_value = {'status': 'uneventful'}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 1:08 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229399
Attached To
D2402: deposit.loader: Remove no longer used deprecated loader
Event Timeline
Log In to Comment