diff --git a/swh/loader/npm/client.py b/swh/loader/npm/client.py --- a/swh/loader/npm/client.py +++ b/swh/loader/npm/client.py @@ -176,11 +176,18 @@ self.package, version, expected_digest, actual_digest)) # uncompress tarball - tarball.uncompress(filepath, path) + tarball_invalid = False + try: + tarball.uncompress(filepath, path) + except Exception: + tarball_invalid = True # remove tarball os.remove(filepath) + if tarball_invalid: + return (None, None, None, None) + # do not archive useless tarball root directory package_path = os.path.join(path, 'package') # some old packages use a root directory with a different name diff --git a/swh/loader/npm/loader.py b/swh/loader/npm/loader.py --- a/swh/loader/npm/loader.py +++ b/swh/loader/npm/loader.py @@ -149,7 +149,7 @@ ret[(package['version'], package['sha1'])] = revision['id'] return ret - def _last_snapshot(self): + def last_snapshot(self): """ Retrieve the last snapshot of the npm package if any. """ @@ -176,7 +176,7 @@ self.package_load_status = 'uneventful' self.package_visit_status = 'full' - last_snapshot = self._last_snapshot() + last_snapshot = self.last_snapshot() self.known_versions = self._known_versions(last_snapshot) self.new_versions = \ @@ -208,6 +208,10 @@ package_metadata, author, package_source_data, dir_path = data + # package release tarball was corrupted + if package_metadata is None: + return not self.done + dir_path = dir_path.encode('utf-8') directory = Directory.from_disk(path=dir_path, data=True) objects = directory.collect() diff --git a/swh/loader/npm/tests/test_loader.py b/swh/loader/npm/tests/test_loader.py --- a/swh/loader/npm/tests/test_loader.py +++ b/swh/loader/npm/tests/test_loader.py @@ -8,6 +8,9 @@ import requests_mock +from unittest.mock import patch + +from swh.core import tarball from swh.loader.core.tests import BaseLoaderStorageTest from swh.loader.npm.loader import NpmLoader from swh.model.identifiers import snapshot_identifier @@ -352,3 +355,30 @@ self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') self.assertFalse(os.path.exists(self.loader.temp_directory)) + + @patch('swh.loader.npm.client.tarball') + def test_npm_loader_6_invalid_tarball(self, m, mock_tarball): + + def _tarball_uncompress(filepath, path): + if filepath.endswith('0.0.3.tgz'): + raise Exception('Invalid tarball !') + else: + tarball.uncompress(filepath, path) + + mock_tarball.uncompress.side_effect = _tarball_uncompress + + self.reset_loader() + init_test_data(m, package_metadata_file(package, visit=1), + package_metadata_url(package)) + self.loader.load(package, package_url(package), + package_metadata_url(package)) + + snapshot = self.loader.last_snapshot() + for branch, target in snapshot['branches'].items(): + if branch == b'releases/0.0.3': + self.assertTrue(target is None) + else: + self.assertTrue(target is not None) + + self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) + self.assertEqual(self.loader.visit_status(), 'partial')