diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ version.txt build/ dist/ +.tox diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ - retrieves the associated artifact archive (with checks) - uncompresses locally the archive - computes the hashes of the uncompressed directory -- then creates a revision (using PKG-INFO metadata file) - targetting such directory -- finally, creates a snapshot targetting all seen revisions +- then creates a revision (using PKG-INFO metadata file) targeting + such directory +- finally, creates a snapshot targeting all seen revisions (uncompressed PyPI artifact and metadata). ## Next visit diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -11,7 +11,7 @@ python3-requests, python3-setuptools, python3-swh.core, - python3-swh.loader.core, + python3-swh.loader.core (>= 0.0.34~), python3-swh.model (>= 0.0.27~), python3-swh.storage, python3-swh.scheduler, @@ -22,7 +22,7 @@ Package: python3-swh.loader.pypi Architecture: all Depends: python3-swh.core, - python3-swh.loader.core, + python3-swh.loader.core (>= 0.0.34~), python3-swh.model (>= 0.0.27~), python3-swh.storage, ${misc:Depends}, ${python3:Depends} diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -2,4 +2,4 @@ swh.model >= 0.0.27 swh.storage swh.scheduler -swh.loader.core +swh.loader.core >= 0.0.34 diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py deleted file mode 100644 --- a/swh/loader/pypi/model.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import os -import logging -import shutil - -from .converters import info, author diff --git a/swh/loader/pypi/tests/common.py b/swh/loader/pypi/tests/common.py --- a/swh/loader/pypi/tests/common.py +++ b/swh/loader/pypi/tests/common.py @@ -26,101 +26,6 @@ cache=True, cache_dir=cache_dir) -class LoaderNoStorage: - """Mixin class to inhibit the persistence (storage calls) and keep in - memory the data sent. - - """ - CONFIG_BASE_FILENAME = '' # do not provide a real path - ADDITIONAL_CONFIG = { - 'storage': ('dict', { - 'cls': 'remote', - 'args': { - 'url': 'http://nowhere:5002/', # do not provide a real storage - } - }), - - # do not send any data to the storage - 'send_contents': ('bool', False), - 'send_directories': ('bool', False), - 'send_revisions': ('bool', False), - 'send_releases': ('bool', False), - 'send_snapshot': ('bool', False), - 'debug': ('bool', False), - } - - def __init__(self, client=None): - super().__init__(client=client) - self.all_contents = [] - self.all_directories = [] - self.all_revisions = [] - self.all_releases = [] - self.all_snapshots = [] - - # typed data - self.objects = { - 'content': self.all_contents, - 'directory': self.all_directories, - 'revision': self.all_revisions, - 'release': self.all_releases, - 'snapshot': self.all_snapshots - } - - def _add(self, type, l): - """Add without duplicates and keeping the insertion order. - - Args: - type (str): Type of objects concerned by the action - l ([object]): List of 'type' object - - """ - col = self.objects[type] - for o in l: - if o in col: - continue - col.extend([o]) - - def maybe_load_contents(self, all_contents): - self._add('content', all_contents) - - def maybe_load_directories(self, all_directories): - self._add('directory', all_directories) - - def maybe_load_revisions(self, all_revisions): - self._add('revision', all_revisions) - - def maybe_load_releases(self, releases): - raise ValueError('If called, the test must break.') - - def maybe_load_snapshot(self, snapshot): - self.objects['snapshot'].append(snapshot) - - def _store_origin_visit(self): - pass - - def open_fetch_history(self): - pass - - def close_fetch_history_success(self, fetch_history_id): - pass - - def close_fetch_history_failure(self, fetch_history_id): - pass - - def update_origin_visit(self, origin_id, visit, status): - pass - - # Override to do nothing at the end - def close_failure(self): - pass - - def close_success(self): - pass - - def pre_cleanup(self): - pass - - @attr('fs') class WithProjectTest(TestCase): def setUp(self): diff --git a/swh/loader/pypi/tests/test_loader.py b/swh/loader/pypi/tests/test_loader.py --- a/swh/loader/pypi/tests/test_loader.py +++ b/swh/loader/pypi/tests/test_loader.py @@ -9,13 +9,13 @@ from nose.plugins.attrib import attr from nose.tools import istest -from unittest import TestCase from swh.model import hashutil +from swh.loader.core.tests import BaseLoaderTest, LoaderNoStorage from swh.loader.pypi.client import PyPIProject from swh.loader.pypi.loader import PyPILoader -from .common import PyPIClientWithCache, RESOURCES_PATH, LoaderNoStorage +from .common import PyPIClientWithCache, RESOURCES_PATH class TestPyPILoader(LoaderNoStorage, PyPILoader): @@ -31,11 +31,11 @@ with open(project_metadata_file) as f: data = json.load(f) - temp_dir = tempfile.mkdtemp( + self.temp_dir = tempfile.mkdtemp( dir='/tmp/', prefix='swh.loader.pypi.tests-') # Will use the pypi with cache client = PyPIClientWithCache( - temp_directory=temp_dir, cache_dir=RESOURCES_PATH) + temp_directory=self.temp_dir, cache_dir=RESOURCES_PATH) super().__init__(client=client) self.project = PyPIProject( client=client, @@ -53,7 +53,7 @@ @attr('fs') -class BaseLoaderITest(TestCase): +class PyPIBaseLoaderTest(BaseLoaderTest): """Loader Test Mixin to prepare the pypi to 'load' in a test context. In this setup, the loader uses the cache to load data so no @@ -62,83 +62,13 @@ """ def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): - self.tmp_root_path = tempfile.mkdtemp() + self.tmp_root_path = tempfile.mkdtemp( + dir='/tmp', prefix='swh.loader.pypi.tests-') self._project = project_name self._origin_url = '%s/pypi/%s/' % (dummy_pypi_instance, project_name) self._project_metadata_url = '%s/pypi/%s/json' % ( dummy_pypi_instance, project_name) - def tearDown(self): - shutil.rmtree(self.tmp_root_path) - - def assertContentsOk(self, expected_contents): - contents = self.loader.all_contents - self.assertEquals(len(contents), len(expected_contents)) - - for content in contents: - content_id = hashutil.hash_to_hex(content['sha1']) - self.assertIn(content_id, expected_contents) - - def assertDirectoriesOk(self, expected_directories): - directories = self.loader.all_directories - self.assertEquals(len(directories), len(expected_directories)) - - for _dir in directories: - _dir_id = hashutil.hash_to_hex(_dir['id']) - self.assertIn(_dir_id, expected_directories) - - def assertSnapshotOk(self, expected_snapshot, expected_branches): - snapshots = self.loader.all_snapshots - self.assertEqual(len(snapshots), 1) - - snap = snapshots[0] - snap_id = hashutil.hash_to_hex(snap['id']) - self.assertEqual(snap_id, expected_snapshot) - - def decode_target(target): - if not target: - return target - target_type = target['target_type'] - - if target_type == 'alias': - decoded_target = target['target'].decode('utf-8') - else: - decoded_target = hashutil.hash_to_hex(target['target']) - - return { - 'target': decoded_target, - 'target_type': target_type - } - - branches = { - branch.decode('utf-8'): decode_target(target) - for branch, target in snap['branches'].items() - } - self.assertEqual(expected_branches, branches) - - def assertRevisionsOk(self, expected_revisions): # noqa: N802 - """Check the loader's revisions match the expected revisions. - - Expects self.loader to be instantiated and ready to be - inspected (meaning the loading took place). - - Args: - expected_revisions (dict): Dict with key revision id, - value the targeted directory id. - - """ - # The last revision being the one used later to start back from - for rev in self.loader.all_revisions: - rev_id = hashutil.hash_to_hex(rev['id']) - directory_id = hashutil.hash_to_hex(rev['directory']) - - self.assertEquals(expected_revisions[rev_id], directory_id) - - -# Define loaders with no storage -# They'll just accumulate the data in place -# Only for testing purposes. - class PyPILoaderNoSnapshot(TestPyPILoader): """Same as TestPyPILoader with no prior snapshot seen @@ -148,7 +78,7 @@ return None -class LoaderITest(BaseLoaderITest): +class LoaderITest(PyPIBaseLoaderTest): def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): super().setUp(project_name, dummy_pypi_instance) @@ -164,15 +94,13 @@ self._project, self._origin_url, self._project_metadata_url) # then - self.assertEquals(len(self.loader.all_contents), 6, - '3 contents per release artifact files (2)') - self.assertEquals(len(self.loader.all_directories), 4) - self.assertEquals(len(self.loader.all_revisions), 2, - '2 releases so 2 revisions should be created') - self.assertEquals(len(self.loader.all_releases), 0, - 'No release is created in the pypi loader') - self.assertEquals(len(self.loader.all_snapshots), 1, - 'Only 1 snapshot targetting all revisions') + self.assertCountContents( + 6, '3 contents per release artifact files (2)') + self.assertCountDirectories(4) + self.assertCountRevisions( + 2, '2 releases so 2 revisions should be created') + self.assertCountReleases(0, 'No release is created in the pypi loader') + self.assertCountSnapshots(1, 'Only 1 snapshot targeting all revisions') expected_contents = [ 'a61e24cdfdab3bb7817f6be85d37a3e666b34566', @@ -264,7 +192,7 @@ } -class LoaderNoNewChangesSinceLastVisitITest(BaseLoaderITest): +class LoaderNoNewChangesSinceLastVisitITest(PyPIBaseLoaderTest): """This scenario makes use of the incremental nature of the loader. If nothing changes in between visits, the snapshot for the visit @@ -286,11 +214,11 @@ self._project, self._origin_url, self._project_metadata_url) # then - self.assertEquals(len(self.loader.all_contents), 0) - self.assertEquals(len(self.loader.all_directories), 0) - self.assertEquals(len(self.loader.all_revisions), 0) - self.assertEquals(len(self.loader.all_releases), 0) - self.assertEquals(len(self.loader.all_snapshots), 1) + self.assertCountContents(0) + self.assertCountDirectories(0) + self.assertCountRevisions(0) + self.assertCountReleases(0) + self.assertCountSnapshots(1) self.assertContentsOk([]) self.assertDirectoriesOk([]) @@ -320,7 +248,7 @@ self.assertEqual(self.loader.visit_status(), 'full') -class LoaderNewChangesSinceLastVisitITest(BaseLoaderITest): +class LoaderNewChangesSinceLastVisitITest(PyPIBaseLoaderTest): """In this scenario, a visit has already taken place. An existing snapshot exists. @@ -331,7 +259,7 @@ The visit results in a new snapshot. The new snapshot shares the same history as prior visit's - snapshot. It holds a new branch targetting the new revision. + snapshot. It holds a new branch targeting the new revision. """ def setUp(self, project_name='0805nexter', @@ -351,15 +279,13 @@ self._project, self._origin_url, self._project_metadata_url) # then - self.assertEquals( - len(self.loader.all_contents), 4, + self.assertCountContents(4, "3 + 1 new content (only change between 1.2.0 and 1.3.0 archives)") - self.assertEquals(len(self.loader.all_directories), 2) - self.assertEquals( - len(self.loader.all_revisions), 1, - "This results in 1 new revision targetting that new directory id") - self.assertEquals(len(self.loader.all_releases), 0) - self.assertEquals(len(self.loader.all_snapshots), 1) + self.assertCountDirectories(2) + self.assertCountRevisions( + 1, "1 new revision targeting that new directory id") + self.assertCountReleases(0) + self.assertCountSnapshots(1) expected_contents = [ '92689fa2b7fb4d4fc6fb195bf73a50c87c030639', # new one @@ -464,7 +390,7 @@ } -class LoaderChangesOldReleaseArtifactRemovedSinceLastVisit(BaseLoaderITest): +class LoaderChangesOldReleaseArtifactRemovedSinceLastVisit(PyPIBaseLoaderTest): """In this scenario, a visit has already taken place. An existing snapshot exists. @@ -497,15 +423,13 @@ self._project, self._origin_url, self._project_metadata_url) # then - self.assertEquals( - len(self.loader.all_contents), 4, + self.assertCountContents(4, "3 + 1 new content (only change between 1.3.0 and 1.4.0 archives)") - self.assertEquals(len(self.loader.all_directories), 2) - self.assertEquals( - len(self.loader.all_revisions), 1, - "This results in 1 new revision targetting that new directory id") - self.assertEquals(len(self.loader.all_releases), 0) - self.assertEquals(len(self.loader.all_snapshots), 1) + self.assertCountDirectories(2) + self.assertCountRevisions(1, + "This results in 1 new revision targeting that new directory id") + self.assertCountReleases(0) + self.assertCountSnapshots(1) expected_contents = [ 'e2d68a197e3a3ad0fc6de28749077892c2148043', # new one