diff --git a/PKG-INFO b/PKG-INFO index da7ca93a..1b74163f 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.50 +Version: 0.0.51 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index 9e5a868e..4d986c11 100644 --- a/debian/control +++ b/debian/control @@ -1,56 +1,56 @@ Source: swh-deposit Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-setuptools, python3-all, python3-nose, python3-django-nose, python3-vcversioner, python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), - python3-swh.loader.core (>= 0.0.30~), - python3-swh.loader.tar (>= 0.0.34~), + python3-swh.loader.core (>= 0.0.32~), + python3-swh.loader.tar (>= 0.0.35~), python3-swh.scheduler (>= 0.0.19~), python3-django, python3-click, python3-vcversioner, python3-djangorestframework, python3-djangorestframework-xml, python3-requests, python3-lxml, patool Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/source/swh-deposit/ Package: python3-swh.deposit Architecture: all Depends: python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), python3-swh.scheduler (>= 0.0.19~), patool, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Server Package: python3-swh.deposit.client Architecture: all Depends: python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), python3-requests, python3-lxml, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Api Client Package: python3-swh.deposit.loader Conflict: python3-swh.deposit.injection Architecture: all Depends: python3-swh.deposit.client (= ${binary:Version}), python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), - python3-swh.loader.core (>= 0.0.30~), - python3-swh.loader.tar (>= 0.0.34~), + python3-swh.loader.core (>= 0.0.32~), + python3-swh.loader.tar (>= 0.0.35~), python3-swh.scheduler (>= 0.0.19~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Loader diff --git a/requirements-swh.txt b/requirements-swh.txt index e141980d..4a51cbcf 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.36 -swh.loader.tar >= 0.0.34 -swh.loader.core >= 0.0.30 +swh.loader.tar >= 0.0.35 +swh.loader.core >= 0.0.32 swh.scheduler >= 0.0.19 swh.model >= 0.0.21 diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index da7ca93a..1b74163f 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.50 +Version: 0.0.51 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index c3cf6f23..dcc067a0 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,11 +1,11 @@ Django click djangorestframework djangorestframework-xml lxml swh.core>=0.0.36 -swh.loader.core>=0.0.30 -swh.loader.tar>=0.0.34 +swh.loader.core>=0.0.32 +swh.loader.tar>=0.0.35 swh.model>=0.0.21 swh.scheduler>=0.0.19 vcversioner diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py index 3763a2e1..5e1601b2 100644 --- a/swh/deposit/loader/loader.py +++ b/swh/deposit/loader/loader.py @@ -1,129 +1,130 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import datetime import os import tempfile from swh.model import hashutil from swh.loader.tar import loader from swh.loader.core.loader import SWHLoader from ..client import PrivateApiDepositClient class DepositLoader(loader.TarLoader): """Deposit loader implementation. This is a subclass of the :class:TarLoader as the main goal of this class is to first retrieve the deposit's tarball contents as one and its associated metadata. Then provide said tarball to be loaded by the TarLoader. This will: - retrieves the deposit's archive locally - provide the archive to be loaded by the tar loader - clean up the temporary location used to retrieve the archive locally - update the deposit's status accordingly """ CONFIG_BASE_FILENAME = 'loader/deposit' ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'), } def __init__(self, client=None): super().__init__( logging_class='swh.deposit.loader.loader.DepositLoader') self.client = client if client else PrivateApiDepositClient() def load(self, *, archive_url, deposit_meta_url, deposit_update_url): return SWHLoader.load( self, archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) + def prepare_origin_visit(self, *, deposit_meta_url, **kwargs): + self.metadata = self.client.metadata_get( + deposit_meta_url, log=self.log) + self.origin = self.metadata['origin'] + self.visit_date = None + def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url): """Prepare the loading by first retrieving the deposit's raw archive content. """ self.deposit_update_url = deposit_update_url self.client.status_update(deposit_update_url, 'loading') temporary_directory = tempfile.TemporaryDirectory() self.temporary_directory = temporary_directory archive_path = os.path.join(temporary_directory.name, 'archive.zip') archive = self.client.archive_get( archive_url, archive_path, log=self.log) - metadata = self.client.metadata_get( - deposit_meta_url, log=self.log) - origin = metadata['origin'] - visit_date = datetime.datetime.now(tz=datetime.timezone.utc) + metadata = self.metadata revision = metadata['revision'] branch_name = metadata['branch_name'] self.origin_metadata = metadata['origin_metadata'] self.prepare_metadata() super().prepare(tar_path=archive, - origin=origin, - visit_date=visit_date, + origin=self.origin, revision=revision, branch_name=branch_name) def store_metadata(self): """Storing the origin_metadata during the load processus. Provider_id and tool_id are resolved during the prepare() method. """ origin_id = self.origin_id visit_date = self.visit_date provider_id = self.origin_metadata['provider']['provider_id'] tool_id = self.origin_metadata['tool']['tool_id'] metadata = self.origin_metadata['metadata'] try: self.send_origin_metadata(origin_id, visit_date, provider_id, tool_id, metadata) except Exception: self.log.exception('Problem when storing origin_metadata') raise def post_load(self, success=True): """Updating the deposit's status according to its loading status. If not successful, we update its status to 'failed'. Otherwise, we update its status to 'done' and pass along its associated revision. """ try: if not success: self.client.status_update(self.deposit_update_url, status='failed') return # first retrieve the new revision [rev_id] = self.objects['revision'].keys() if rev_id: rev_id_hex = hashutil.hash_to_hex(rev_id) # then update the deposit's status to success with its # revision-id self.client.status_update(self.deposit_update_url, status='done', revision_id=rev_id_hex) except Exception: self.log.exception( 'Problem when trying to update the deposit\'s status') def cleanup(self): """Clean up temporary directory where we retrieved the tarball. """ super().cleanup() self.temporary_directory.cleanup() diff --git a/version.txt b/version.txt index 23f6a1c0..f548cb09 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.50-0-gcd80d1c \ No newline at end of file +v0.0.51-0-g735b248 \ No newline at end of file