diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -7,6 +7,7 @@ python3-all, python3-arrow, python3-requests, + python3-requests-mock, python3-pytest, python3-setuptools, python3-swh.core (>= 0.0.46~), diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1 +1,2 @@ pytest +requests-mock diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py --- a/swh/loader/tar/loader.py +++ b/swh/loader/tar/loader.py @@ -15,7 +15,7 @@ from swh.core import tarball from swh.loader.core.loader import BufferedLoader from swh.loader.dir.loader import revision_from, snapshot_from -from swh.model.hashutil import MultiHash +from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE from swh.model.from_disk import Directory from .build import compute_revision @@ -92,7 +92,7 @@ h = MultiHash(length=length) with open(filepath, 'wb') as f: - for chunk in response.iter_content(chunk_size=None): + for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE): h.update(chunk) f.write(chunk) diff --git a/swh/loader/tar/tests/test_loader.py b/swh/loader/tar/tests/test_loader.py --- a/swh/loader/tar/tests/test_loader.py +++ b/swh/loader/tar/tests/test_loader.py @@ -4,8 +4,8 @@ # See top-level LICENSE file for more information import os - import pytest +import requests_mock from swh.model import hashutil @@ -52,35 +52,7 @@ uncompress_archive=False) self.tarpath = self.destination_path - -class TestTarLoader1(PrepareDataForTestLoader): - """Test the remote loader - - """ - def setUp(self): - super().setUp() - self.loader = RemoteTarLoaderForTest() - self.storage = self.loader.storage - - @pytest.mark.fs - def test_load(self): - """Process a new tarball should be ok - - """ - # given - origin = { - 'url': self.repo_url, - 'type': 'tar' - } - - visit_date = 'Tue, 3 May 2016 17:16:32 +0200' - - last_modified = '2018-12-05T12:35:23+00:00' - - # when - self.loader.load( - origin=origin, visit_date=visit_date, last_modified=last_modified) - + def assert_data_ok(self): # then self.assertCountContents(8, "3 files + 5 links") self.assertCountDirectories(6, "4 subdirs + 1 empty + 1 main dir") @@ -113,6 +85,104 @@ self.assertCountSnapshots(1) +class TestRemoteTarLoader(PrepareDataForTestLoader): + """Test the remote loader scenario (local/remote) + + """ + def setUp(self): + super().setUp() + self.loader = RemoteTarLoaderForTest() + self.storage = self.loader.storage + + @pytest.mark.fs + def test_load_local(self): + """Load a local tarball should result in persisted swh data + + """ + # given + origin = { + 'url': self.repo_url, + 'type': 'tar' + } + + visit_date = 'Tue, 3 May 2016 17:16:32 +0200' + + last_modified = '2018-12-05T12:35:23+00:00' + + # when + self.loader.load( + origin=origin, visit_date=visit_date, last_modified=last_modified) + + # then + self.assert_data_ok() + + @requests_mock.Mocker() + def test_load_remote(self, mock_requests): + """Load a remote tarball should result in persisted swh data + + """ + # setup the mock to stream the content of the tarball + local_url = self.repo_url.replace('file:///', '/') + url = 'https://nowhere.org/%s' % local_url + with open(local_url, 'rb') as f: + data = f.read() + mock_requests.get(url, content=data, headers={ + 'content-length': str(len(data)) + }) + + # given + origin = { + 'url': url, + 'type': 'tar' + } + + visit_date = 'Tue, 3 May 2016 17:16:32 +0200' + + last_modified = '2018-12-05T12:35:23+00:00' + + # when + self.loader.load( + origin=origin, visit_date=visit_date, last_modified=last_modified) + + self.assert_data_ok() + + @requests_mock.Mocker() + def test_load_remote_download_failure(self, mock_requests): + """Load a remote tarball with download failure should result in no data + + """ + # setup the mock to stream the content of the tarball + local_url = self.repo_url.replace('file:///', '/') + url = 'https://nowhere.org/%s' % local_url + with open(local_url, 'rb') as f: + data = f.read() + wrong_length = len(data) - 10 + mock_requests.get(url, content=data, headers={ + 'content-length': str(wrong_length) + }) + + # given + origin = { + 'url': url, + 'type': 'tar' + } + + visit_date = 'Tue, 3 May 2016 17:16:32 +0200' + + last_modified = '2018-12-05T12:35:23+00:00' + + # when + r = self.loader.load( + origin=origin, visit_date=visit_date, + last_modified=last_modified) + + self.assertEqual(r, {'status': 'failed'}) + self.assertCountContents(0) + self.assertCountDirectories(0) + self.assertCountRevisions(0) + self.assertCountSnapshots(0) + + class TarLoaderForTest(TarLoader): def parse_config_file(self, *args, **kwargs): return TEST_CONFIG @@ -173,32 +243,4 @@ branch_name=branch_name) # then - self.assertCountContents(8, "3 files + 5 links") - self.assertCountDirectories(6, "4 subdirs + 1 empty + 1 main dir") - self.assertCountRevisions(1, "synthetic revision") - - rev_id = hashutil.hash_to_bytes( - '67a7d7dda748f9a86b56a13d9218d16f5cc9ab3d') - actual_revision = next(self.storage.revision_get([rev_id])) - self.assertTrue(actual_revision['synthetic']) - self.assertEqual(actual_revision['parents'], []) - self.assertEqual(actual_revision['type'], 'tar') - self.assertEqual(actual_revision['message'], - b'swh-loader-tar: synthetic revision message') - self.assertEqual(actual_revision['directory'], - b'\xa7A\xfcM\x96\x8c{\x8e<\x94\xff\x86\xe7\x04\x80\xc5\xc7\xe5r\xa9') # noqa - - self.assertEqual( - actual_revision['metadata']['original_artifact'][0], - { - 'sha1_git': 'cc848944a0d3e71d287027347e25467e61b07428', - 'archive_type': 'tar', - 'blake2s256': '5d70923443ad36377cd58e993aff0e3c1b9ef14f796c69569105d3a99c64f075', # noqa - 'name': 'sample-folder.tgz', - 'sha1': '3ca0d0a5c6833113bd532dc5c99d9648d618f65a', - 'length': 555, - 'sha256': '307ebda0071ca5975f618e192c8417161e19b6c8bf581a26061b76dc8e85321d' # noqa - }) - - self.assertCountReleases(0) - self.assertCountSnapshots(1) + self.assert_data_ok()