diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -7,6 +7,7 @@ python3-all, python3-arrow, python3-requests, + python3-requests-mock, python3-pytest, python3-setuptools, python3-swh.core (>= 0.0.46~), diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1 +1,2 @@ pytest +requests-mock diff --git a/swh/loader/tar/loader.py b/swh/loader/tar/loader.py --- a/swh/loader/tar/loader.py +++ b/swh/loader/tar/loader.py @@ -28,6 +28,7 @@ TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.tar.' DEBUG_MODE = '** DEBUG MODE **' +CHUNK_SIZE = 4096 class LocalResponse: @@ -92,7 +93,7 @@ h = MultiHash(length=length) with open(filepath, 'wb') as f: - for chunk in response.iter_content(chunk_size=None): + for chunk in response.iter_content(chunk_size=CHUNK_SIZE): h.update(chunk) f.write(chunk) diff --git a/swh/loader/tar/tests/test_loader.py b/swh/loader/tar/tests/test_loader.py --- a/swh/loader/tar/tests/test_loader.py +++ b/swh/loader/tar/tests/test_loader.py @@ -4,8 +4,8 @@ # See top-level LICENSE file for more information import os - import pytest +import requests_mock from swh.model import hashutil @@ -53,7 +53,7 @@ self.tarpath = self.destination_path -class TestTarLoader1(PrepareDataForTestLoader): +class TestBaseRemoteTarLoader(PrepareDataForTestLoader): """Test the remote loader """ @@ -62,9 +62,11 @@ self.loader = RemoteTarLoaderForTest() self.storage = self.loader.storage + +class TestRemoteTarLoader(TestBaseRemoteTarLoader): @pytest.mark.fs - def test_load(self): - """Process a new tarball should be ok + def test_load_local(self): + """Load a local tarball should result in persisted swh data """ # given @@ -113,6 +115,67 @@ self.assertCountSnapshots(1) +class TestRemoteTarLoader2(TestBaseRemoteTarLoader): + @pytest.mark.fs + @requests_mock.Mocker() + def test_load_remote(self, mock_requests): + """Load a remote tarball should result in persisted swh data + + """ + # setup the mock to stream the content of the tarball + url = 'https://nowhere.org/%s' % self.repo_url + with open(self.repo_url.replace('file:///', '/'), 'rb') as f: + data = f.read() + mock_requests.get(url, content=data, headers={ + 'content-length': str(len(data)) + }) + + # given + origin = { + 'url': url, + 'type': 'tar' + } + + visit_date = 'Tue, 3 May 2016 17:16:32 +0200' + + last_modified = '2018-12-05T12:35:23+00:00' + + # when + self.loader.load( + origin=origin, visit_date=visit_date, last_modified=last_modified) + + # then + self.assertCountContents(8, "3 files + 5 links") + self.assertCountDirectories(6, "4 subdirs + 1 empty + 1 main dir") + self.assertCountRevisions(1, "synthetic revision") + + rev_id = hashutil.hash_to_bytes( + '67a7d7dda748f9a86b56a13d9218d16f5cc9ab3d') + actual_revision = next(self.storage.revision_get([rev_id])) + self.assertTrue(actual_revision['synthetic']) + self.assertEqual(actual_revision['parents'], []) + self.assertEqual(actual_revision['type'], 'tar') + self.assertEqual(actual_revision['message'], + b'swh-loader-tar: synthetic revision message') + self.assertEqual(actual_revision['directory'], + b'\xa7A\xfcM\x96\x8c{\x8e<\x94\xff\x86\xe7\x04\x80\xc5\xc7\xe5r\xa9') # noqa + + self.assertEqual( + actual_revision['metadata']['original_artifact'][0], + { + 'sha1_git': 'cc848944a0d3e71d287027347e25467e61b07428', + 'archive_type': 'tar', + 'blake2s256': '5d70923443ad36377cd58e993aff0e3c1b9ef14f796c69569105d3a99c64f075', # noqa + 'name': 'sample-folder.tgz', + 'sha1': '3ca0d0a5c6833113bd532dc5c99d9648d618f65a', + 'length': 555, + 'sha256': '307ebda0071ca5975f618e192c8417161e19b6c8bf581a26061b76dc8e85321d' # noqa + }) + + self.assertCountReleases(0) + self.assertCountSnapshots(1) + + class TarLoaderForTest(TarLoader): def parse_config_file(self, *args, **kwargs): return TEST_CONFIG