diff --git a/swh/loader/tar/build.py b/swh/loader/tar/build.py --- a/swh/loader/tar/build.py +++ b/swh/loader/tar/build.py @@ -3,9 +3,16 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import copy +import logging +import os + import arrow +logger = logging.getLogger(__name__) + + # Static setup EPOCH = 0 UTC_OFFSET = 0 @@ -74,3 +81,28 @@ 'message': REVISION_MESSAGE, 'synthetic': True, } + + +def set_original_artifact(*, revision, filepath, nature, hashes): + """Set the original artifact data on the given revision for + the tarball currently being loaded.""" + + revision = copy.deepcopy(revision) + if 'metadata' not in revision or not revision['metadata']: + revision['metadata'] = {} + if 'original_artifact' in revision['metadata']: + oa = revision['metadata']['original_artifact'] + if oa: + logger.warning( + 'Revision already contains original_artifact metadata, ' + 'replacing: %r', + oa, + ) + + revision['metadata']['original_artifact'] = [{ + 'name': os.path.basename(filepath), + 'archive_type': nature, + **hashes, + }] + + return revision diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py --- a/swh/loader/tar/loader.py +++ b/swh/loader/tar/loader.py @@ -18,7 +18,7 @@ from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE from swh.model.from_disk import Directory -from .build import compute_revision +from .build import compute_revision, set_original_artifact try: from _version import __version__ @@ -263,16 +263,12 @@ build the revision. """ - return { - **compute_revision(filepath, self.last_modified), - 'metadata': { - 'original_artifact': [{ - 'name': os.path.basename(filepath), - 'archive_type': nature, - **hashes, - }], - } - } + return set_original_artifact( + revision=compute_revision(filepath, self.last_modified), + filepath=filepath, + nature=nature, + hashes=hashes, + ) def build_snapshot(self, revision): """Build the snapshot targeting the revision. @@ -325,16 +321,12 @@ revision. """ - return { - **self.revision, - 'metadata': { - 'original_artifact': [{ - 'name': os.path.basename(filepath), - 'archive_type': nature, - **hashes, - }], - } - } + return set_original_artifact( + revision=self.revision, + filepath=filepath, + nature=nature, + hashes=hashes, + ) def build_snapshot(self, revision): """Build the snapshot targeting the revision. diff --git a/swh/loader/tar/tests/test_loader.py b/swh/loader/tar/tests/test_loader.py --- a/swh/loader/tar/tests/test_loader.py +++ b/swh/loader/tar/tests/test_loader.py @@ -85,6 +85,8 @@ self.assertCountReleases(0) self.assertCountSnapshots(1) + return actual_revision + class TestRemoteTarLoader(PrepareDataForTestLoader): """Test the remote loader scenario (local/remote) @@ -194,7 +196,6 @@ def test_load(self): """Load a local tarball should result in persisted swh data - """ # given origin = { @@ -226,6 +227,10 @@ 'type': revision_type, 'message': revision_message, 'synthetic': True, + 'metadata': { + 'foo': 'bar', + 'original_artifact': ['bogus_original_artifact'], + } } branch_name = os.path.basename(self.tarpath) @@ -236,4 +241,10 @@ branch_name=branch_name) # then - self.assert_data_ok() + actual_revision = self.assert_data_ok() + + # Check metadata passthrough + assert actual_revision['metadata']['foo'] == 'bar' + + # FIXME: use the caplog pytest fixture to check that the clobbering of + # original artifact sent a warning