diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py index cc50a2a..98e6b66 100644 --- a/swh/loader/pypi/loader.py +++ b/swh/loader/pypi/loader.py @@ -1,304 +1,307 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil from tempfile import mkdtemp import arrow from swh.loader.core.utils import clean_dangling_folders from swh.loader.core.loader import SWHLoader from swh.model.from_disk import Directory from swh.model.identifiers import ( revision_identifier, snapshot_identifier, identifier_to_bytes, normalize_timestamp ) from .client import PyPIClient, PyPIProject TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.' DEBUG_MODE = '** DEBUG MODE **' class PyPILoader(SWHLoader): CONFIG_BASE_FILENAME = 'loader/pypi' ADDITIONAL_CONFIG = { 'temp_directory': ('str', '/tmp/swh.loader.pypi/'), 'cache': ('bool', False), 'cache_dir': ('str', ''), 'debug': ('bool', False), # NOT FOR PRODUCTION } def __init__(self, client=None): super().__init__(logging_class='swh.loader.pypi.PyPILoader') self.origin_id = None if not client: temp_directory = self.config['temp_directory'] os.makedirs(temp_directory, exist_ok=True) self.temp_directory = mkdtemp( suffix='-%s' % os.getpid(), prefix=TEMPORARY_DIR_PREFIX_PATTERN, dir=temp_directory) self.pypi_client = PyPIClient( temp_directory=self.temp_directory, cache=self.config['cache'], cache_dir=self.config['cache_dir']) else: self.temp_directory = client.temp_directory self.pypi_client = client self.debug = self.config['debug'] self.done = False def pre_cleanup(self): """To prevent disk explosion if some other workers exploded in mid-air (OOM killed), we try and clean up dangling files. """ if self.debug: self.log.warn('%s Will not pre-clean up temp dir %s' % ( DEBUG_MODE, self.temp_directory )) return clean_dangling_folders(self.config['temp_directory'], pattern_check=TEMPORARY_DIR_PREFIX_PATTERN, log=self.log) def cleanup(self): """Clean up temporary disk use """ if self.debug: self.log.warn('%s Will not clean up temp dir %s' % ( DEBUG_MODE, self.temp_directory )) return if os.path.exists(self.temp_directory): self.log.debug('Clean up %s' % self.temp_directory) shutil.rmtree(self.temp_directory) def prepare_origin_visit(self, project_name, origin_url, origin_metadata_url=None): """Prepare the origin visit information Args: project_name (str): Project's simple name origin_url (str): Project's main url origin_metadata_url (str): Project's metadata url """ self.origin = { 'url': origin_url, 'type': 'pypi', } self.visit_date = None # loader core will populate it def _known_artifacts(self, last_snapshot): """Retrieve the known releases/artifact for the origin_id. Args snapshot (dict): Last snapshot for the visit Returns: list of (filename, sha256) tuples. """ if not last_snapshot or 'branches' not in last_snapshot: return {} revs = [rev['target'] for rev in last_snapshot['branches'].values()] known_revisions = self.storage.revision_get(revs) ret = {} for revision in known_revisions: if 'original_artifact' in revision['metadata']: artifact = revision['metadata']['original_artifact'] ret[artifact['filename'], artifact['sha256']] = revision['id'] return ret def _last_snapshot(self): """Retrieve the last snapshot """ return self.storage.snapshot_get_latest(self.origin_id) def prepare(self, project_name, origin_url, origin_metadata_url=None): """Keep reference to the origin url (project) and the project metadata url Args: project_name (str): Project's simple name origin_url (str): Project's main url origin_metadata_url (str): Project's metadata url """ self.project_name = project_name self.origin_url = origin_url self.origin_metadata_url = origin_metadata_url self.project = PyPIProject(self.pypi_client, self.project_name, self.origin_metadata_url) self._prepare_state() def _prepare_state(self): """Initialize internal state (snapshot, contents, directories, etc...) This is called from `prepare` method. """ last_snapshot = self._last_snapshot() self.known_artifacts = self._known_artifacts(last_snapshot) # and the artifacts # that will be the source of data to retrieve self.new_artifacts = self.project.download_new_releases( self.known_artifacts ) # temporary state self._contents = [] self._directories = [] self._revisions = [] self._load_status = 'uneventful' self._visit_status = 'full' def fetch_data(self): """Called once per release artifact version (can be many for one release). This will for each call: - retrieve a release artifact (associated to a release version) - Uncompress it and compute the necessary information - Computes the swh objects Returns: True as long as data to fetch exist """ data = None if self.done: return False try: data = next(self.new_artifacts) self._load_status = 'eventful' except StopIteration: self.done = True return False project_info, author, release, artifact, dir_path = data dir_path = dir_path.encode('utf-8') directory = Directory.from_disk(path=dir_path, data=True) _objects = directory.collect() self._contents = _objects['content'].values() self._directories = _objects['directory'].values() date = normalize_timestamp( int(arrow.get(artifact['date']).timestamp)) name = release['name'].encode('utf-8') message = release['message'].encode('utf-8') if message: message = b'%s: %s' % (name, message) else: message = name _revision = { 'synthetic': True, 'metadata': { 'original_artifact': artifact, 'project': project_info, }, 'author': author, 'date': date, 'committer': author, 'committer_date': date, 'message': message, 'directory': directory.hash, 'parents': [], 'type': 'tar', } _revision['id'] = identifier_to_bytes( revision_identifier(_revision)) self._revisions.append(_revision) artifact_key = artifact['filename'], artifact['sha256'] self.known_artifacts[artifact_key] = _revision['id'] return not self.done def target_from_artifact(self, filename, sha256): target = self.known_artifacts.get((filename, sha256)) if target: return { 'target': target, 'target_type': 'revision', } return None def generate_and_load_snapshot(self): branches = {} for release, artifacts in self.project.all_release_artifacts().items(): default_release = self.project.default_release() if len(artifacts) == 1: # Only one artifact for this release, generate a single branch - branch_name = 'refs/tags/%s' % release + branch_name = 'releases/%s' % release filename, sha256 = artifacts[0] target = self.target_from_artifact(filename, sha256) branches[branch_name.encode('utf-8')] = target if release == default_release: - branches[b'HEAD'] = target + branches[b'HEAD'] = { + 'target_type': 'alias', + 'target': branch_name.encode('utf-8'), + } if not target: self._visit_status = 'partial' else: # Several artifacts for this release, generate a separate # pointer for each of them for filename, sha256 in artifacts: - branch_name = 'refs/tags/%s/%s' % (release, filename) + branch_name = 'releases/%s/%s' % (release, filename) target = self.target_from_artifact(filename, sha256) branches[branch_name.encode('utf-8')] = target if not target: self._visit_status = 'partial' snapshot = { 'branches': branches, } snapshot['id'] = identifier_to_bytes( snapshot_identifier(snapshot)) self.maybe_load_snapshot(snapshot) def store_data(self): """(override) This sends collected objects to storage. """ self.maybe_load_contents(self._contents) self.maybe_load_directories(self._directories) self.maybe_load_revisions(self._revisions) if self.done: self.generate_and_load_snapshot() self.flush() def load_status(self): return { 'status': self._load_status, } def visit_status(self): return self._visit_status if __name__ == '__main__': import logging import sys logging.basicConfig(level=logging.DEBUG) if len(sys.argv) != 2: logging.error('Usage: %s ' % sys.argv[0]) sys.exit(1) module_name = sys.argv[1] loader = PyPILoader() loader.load( module_name, 'https://pypi.org/projects/%s/' % module_name, 'https://pypi.org/pypi/%s/json' % module_name, ) diff --git a/swh/loader/pypi/tests/test_loader.py b/swh/loader/pypi/tests/test_loader.py index b4a341c..2b521f3 100644 --- a/swh/loader/pypi/tests/test_loader.py +++ b/swh/loader/pypi/tests/test_loader.py @@ -1,546 +1,556 @@ # Copyright (C) 2016-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import shutil import tempfile from nose.plugins.attrib import attr from nose.tools import istest from unittest import TestCase from swh.model import hashutil from swh.loader.pypi.client import PyPIProject from swh.loader.pypi.loader import PyPILoader from .common import PyPIClientWithCache, RESOURCES_PATH, LoaderNoStorage class TestPyPILoader(LoaderNoStorage, PyPILoader): """Real PyPILoader for test purposes (storage and pypi interactions inhibited) """ def __init__(self, project_name, json_filename=None): if not json_filename: # defaulting to using same name as project json_filename = '%s.json' % project_name project_metadata_file = '%s/%s' % (RESOURCES_PATH, json_filename) project_metadata_url = 'https://pypi.org/pypi/%s/json' % project_name with open(project_metadata_file) as f: data = json.load(f) temp_dir = tempfile.mkdtemp( dir='/tmp/', prefix='swh.loader.pypi.tests-') # Will use the pypi with cache client = PyPIClientWithCache( temp_directory=temp_dir, cache_dir=RESOURCES_PATH) super().__init__(client=client) self.project = PyPIProject( client=client, project=project_name, project_metadata_url=project_metadata_url, data=data) def prepare(self, project_name, origin_url, origin_metadata_url=None): self.project_name = project_name self.origin_url = origin_url self.origin_metadata_url = origin_metadata_url self.visit = 1 # first visit self._prepare_state() @attr('fs') class BaseLoaderITest(TestCase): """Loader Test Mixin to prepare the pypi to 'load' in a test context. In this setup, the loader uses the cache to load data so no network interaction (no storage, no pypi). """ def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): self.tmp_root_path = tempfile.mkdtemp() self._project = project_name self._origin_url = '%s/pypi/%s/' % (dummy_pypi_instance, project_name) self._project_metadata_url = '%s/pypi/%s/json' % ( dummy_pypi_instance, project_name) def tearDown(self): shutil.rmtree(self.tmp_root_path) def assertContentsOk(self, expected_contents): contents = self.loader.all_contents self.assertEquals(len(contents), len(expected_contents)) for content in contents: content_id = hashutil.hash_to_hex(content['sha1']) self.assertIn(content_id, expected_contents) def assertDirectoriesOk(self, expected_directories): directories = self.loader.all_directories self.assertEquals(len(directories), len(expected_directories)) for _dir in directories: _dir_id = hashutil.hash_to_hex(_dir['id']) self.assertIn(_dir_id, expected_directories) def assertSnapshotOk(self, expected_snapshot, expected_branches): snapshots = self.loader.all_snapshots self.assertEqual(len(snapshots), 1) snap = snapshots[0] snap_id = hashutil.hash_to_hex(snap['id']) self.assertEqual(snap_id, expected_snapshot) + def decode_target(target): + if not target: + return target + target_type = target['target_type'] + + if target_type == 'alias': + decoded_target = target['target'].decode('utf-8') + else: + decoded_target = hashutil.hash_to_hex(target['target']) + + return { + 'target': decoded_target, + 'target_type': target_type + } + branches = { - branch.decode('utf-8'): { - 'target': hashutil.hash_to_hex(t['target']), - 'target_type': t['target_type'], - } if t else t - for branch, t in snap['branches'].items() + branch.decode('utf-8'): decode_target(target) + for branch, target in snap['branches'].items() } self.assertEqual(expected_branches, branches) def assertRevisionsOk(self, expected_revisions): # noqa: N802 """Check the loader's revisions match the expected revisions. Expects self.loader to be instantiated and ready to be inspected (meaning the loading took place). Args: expected_revisions (dict): Dict with key revision id, value the targeted directory id. """ # The last revision being the one used later to start back from for rev in self.loader.all_revisions: rev_id = hashutil.hash_to_hex(rev['id']) directory_id = hashutil.hash_to_hex(rev['directory']) self.assertEquals(expected_revisions[rev_id], directory_id) # Define loaders with no storage # They'll just accumulate the data in place # Only for testing purposes. class PyPILoaderNoSnapshot(TestPyPILoader): """Same as TestPyPILoader with no prior snapshot seen """ def _last_snapshot(self): return None class LoaderITest(BaseLoaderITest): def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): super().setUp(project_name, dummy_pypi_instance) self.loader = PyPILoaderNoSnapshot(project_name=project_name) @istest def load(self): """Load a pypi origin """ # when self.loader.load( self._project, self._origin_url, self._project_metadata_url) # then self.assertEquals(len(self.loader.all_contents), 6, '3 contents per release artifact files (2)') self.assertEquals(len(self.loader.all_directories), 4) self.assertEquals(len(self.loader.all_revisions), 2, '2 releases so 2 revisions should be created') self.assertEquals(len(self.loader.all_releases), 0, 'No release is created in the pypi loader') self.assertEquals(len(self.loader.all_snapshots), 1, 'Only 1 snapshot targetting all revisions') expected_contents = [ 'a61e24cdfdab3bb7817f6be85d37a3e666b34566', '938c33483285fd8ad57f15497f538320df82aeb8', 'a27576d60e08c94a05006d2e6d540c0fdb5f38c8', '405859113963cb7a797642b45f171d6360425d16', 'e5686aa568fdb1d19d7f1329267082fe40482d31', '83ecf6ec1114fd260ca7a833a2d165e71258c338', ] self.assertContentsOk(expected_contents) expected_directories = [ '05219ba38bc542d4345d5638af1ed56c7d43ca7d', 'cf019eb456cf6f78d8c4674596f1c9a97ece8f44', 'b178b66bd22383d5f16f4f5c923d39ca798861b4', 'c3a58f8b57433a4b56caaa5033ae2e0931405338', ] self.assertDirectoriesOk(expected_directories) # {revision hash: directory hash} expected_revisions = { '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d', # noqa 'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4', # noqa } self.assertRevisionsOk(expected_revisions) expected_branches = { - 'refs/tags/1.1.0': { + 'releases/1.1.0': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, - 'refs/tags/1.2.0': { + 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, 'HEAD': { - 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', - 'target_type': 'revision', + 'target': 'releases/1.2.0', + 'target_type': 'alias', }, } - self.assertSnapshotOk('5c99339508de5eea67eecd3e0ba1efa8e804b96e', + self.assertSnapshotOk('ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a', expected_branches) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class PyPILoaderWithSnapshot(TestPyPILoader): """This loader provides a snapshot and lists corresponding seen release artifacts. """ def _last_snapshot(self): """Return last visited snapshot""" return { - 'id': b'\\\x993\x95\x08\xde^\xeag\xee\xcd>\x0b\xa1\xef\xa8\xe8\x04\xb9n', # noqa + 'id': b'\xban\x15\x8a\xdau\xd0\xb3\xcf\xb2\t\xff\xdfm\xaaN\xd3J"z', # noqa 'branches': { - b'refs/tags/1.1.0': { + b'releases/1.1.0': { 'target': b'L\x99\x89\x1f\x93\xb8\x14P' b'8Ww#Z7\xb5\xe9f\xdd\x15q', 'target_type': 'revision' }, - b'refs/tags/1.2.0': { + b'releases/1.2.0': { 'target': b'\xe4E\xdaM\xa2+1\xbf' b'\xeb\xb6\xff\xc48=\xbf\x83' b'\x9a\x07M!', 'target_type': 'revision' }, b'HEAD': { - 'target': b'\xe4E\xdaM\xa2+1\xbf' - b'\xeb\xb6\xff\xc48=\xbf\x83' - b'\x9a\x07M!', - 'target_type': 'revision' + 'target': b'releases/1.2.0', + 'target_type': 'alias' }, }, } def _known_artifacts(self, last_snapshot): """List corresponding seen release artifacts""" return { ( '0805nexter-1.1.0.zip', '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035' # noqa ): b'L\x99\x89\x1f\x93\xb8\x14P8Ww#Z7\xb5\xe9f\xdd\x15q', ( '0805nexter-1.2.0.zip', '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709' # noqa ): b'\xe4E\xdaM\xa2+1\xbf\xeb\xb6\xff\xc48=\xbf\x83\x9a\x07M!', } class LoaderNoNewChangesSinceLastVisitITest(BaseLoaderITest): """This scenario makes use of the incremental nature of the loader. If nothing changes in between visits, the snapshot for the visit must stay the same as the first visit. """ def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): super().setUp(project_name, dummy_pypi_instance) self.loader = PyPILoaderWithSnapshot(project_name=project_name) @istest def load(self): """Load a PyPI origin without new changes results in 1 same snapshot """ # when self.loader.load( self._project, self._origin_url, self._project_metadata_url) # then self.assertEquals(len(self.loader.all_contents), 0) self.assertEquals(len(self.loader.all_directories), 0) self.assertEquals(len(self.loader.all_revisions), 0) self.assertEquals(len(self.loader.all_releases), 0) self.assertEquals(len(self.loader.all_snapshots), 1) self.assertContentsOk([]) self.assertDirectoriesOk([]) self.assertRevisionsOk(expected_revisions={}) - expected_snapshot_id = '5c99339508de5eea67eecd3e0ba1efa8e804b96e' + expected_snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a' expected_branches = { - 'refs/tags/1.1.0': { + 'releases/1.1.0': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, - 'refs/tags/1.2.0': { + 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, 'HEAD': { - 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', - 'target_type': 'revision', + 'target': 'releases/1.2.0', + 'target_type': 'alias', }, } self.assertSnapshotOk(expected_snapshot_id, expected_branches) _id = hashutil.hash_to_hex(self.loader._last_snapshot()['id']) self.assertEquals(expected_snapshot_id, _id) self.assertEqual(self.loader.load_status(), {'status': 'uneventful'}) self.assertEqual(self.loader.visit_status(), 'full') class LoaderNewChangesSinceLastVisitITest(BaseLoaderITest): """In this scenario, a visit has already taken place. An existing snapshot exists. This time, the PyPI project has changed, a new release (with 1 new release artifact) has been uploaded. The old releases did not change. The visit results in a new snapshot. The new snapshot shares the same history as prior visit's snapshot. It holds a new branch targetting the new revision. """ def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): super().setUp(project_name, dummy_pypi_instance) self.loader = PyPILoaderWithSnapshot( project_name=project_name, json_filename='0805nexter+new-made-up-release.json') @istest def load(self): """Load a PyPI origin with changes results in 1 new snapshot """ # when self.loader.load( self._project, self._origin_url, self._project_metadata_url) # then self.assertEquals( len(self.loader.all_contents), 4, "3 + 1 new content (only change between 1.2.0 and 1.3.0 archives)") self.assertEquals(len(self.loader.all_directories), 2) self.assertEquals( len(self.loader.all_revisions), 1, "This results in 1 new revision targetting that new directory id") self.assertEquals(len(self.loader.all_releases), 0) self.assertEquals(len(self.loader.all_snapshots), 1) expected_contents = [ '92689fa2b7fb4d4fc6fb195bf73a50c87c030639', # new one '405859113963cb7a797642b45f171d6360425d16', '83ecf6ec1114fd260ca7a833a2d165e71258c338', 'e5686aa568fdb1d19d7f1329267082fe40482d31', ] self.assertContentsOk(expected_contents) expected_directories = [ 'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a', '52604d46843b898f5a43208045d09fcf8731631b', ] self.assertDirectoriesOk(expected_directories) expected_revisions = { 'fb46e49605b0bbe69f8c53d315e89370e7c6cb5d': 'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a', # noqa } self.assertRevisionsOk(expected_revisions) old_revisions = { '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d', # noqa 'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4', # noqa } for rev, dir_id in old_revisions.items(): expected_revisions[rev] = dir_id - expected_snapshot_id = 'c1500c9fae5c275f5a772105c36884dd01bc26e7' + expected_snapshot_id = '07322209e51618410b5e43ca4af7e04fe5113c9d' expected_branches = { - 'refs/tags/1.1.0': { + 'releases/1.1.0': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, - 'refs/tags/1.2.0': { + 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, - 'refs/tags/1.3.0': { + 'releases/1.3.0': { 'target': 'fb46e49605b0bbe69f8c53d315e89370e7c6cb5d', 'target_type': 'revision', }, 'HEAD': { - 'target': 'fb46e49605b0bbe69f8c53d315e89370e7c6cb5d', - 'target_type': 'revision', + 'target': 'releases/1.3.0', + 'target_type': 'alias', }, } self.assertSnapshotOk(expected_snapshot_id, expected_branches) _id = hashutil.hash_to_hex(self.loader._last_snapshot()['id']) self.assertNotEqual(expected_snapshot_id, _id) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class PyPILoaderWithSnapshot2(TestPyPILoader): """This loader provides a snapshot and lists corresponding seen release artifacts. """ def _last_snapshot(self): """Return last visited snapshot""" return { - 'id': b"\xc1P\x0c\x9f\xae\\'_Zw!\x05\xc3h\x84\xdd\x01\xbc&\xe7", # noqa + 'id': b'\x072"\t\xe5\x16\x18A\x0b^C\xcaJ\xf7\xe0O\xe5\x11<\x9d', # noqa 'branches': { - b'refs/tags/1.1.0': { + b'releases/1.1.0': { 'target': b'L\x99\x89\x1f\x93\xb8\x14P8Ww#Z7\xb5\xe9f\xdd\x15q', # noqa 'target_type': 'revision' }, - b'refs/tags/1.2.0': { + b'releases/1.2.0': { 'target': b'\xe4E\xdaM\xa2+1\xbf\xeb\xb6\xff\xc48=\xbf\x83\x9a\x07M!', # noqa 'target_type': 'revision' }, - b'refs/tags/1.3.0': { + b'releases/1.3.0': { 'target': b'\xfbF\xe4\x96\x05\xb0\xbb\xe6\x9f\x8cS\xd3\x15\xe8\x93p\xe7\xc6\xcb]', # noqa 'target_type': 'revision' }, b'HEAD': { - 'target': b'\xfbF\xe4\x96\x05\xb0\xbb\xe6\x9f\x8cS\xd3\x15\xe8\x93p\xe7\xc6\xcb]', # noqa - 'target_type': 'revision' + 'target': b'releases/1.3.0', # noqa + 'target_type': 'alias' }, } } def _known_artifacts(self, last_snapshot): """Map previously seen release artifacts to their revision""" return { ( '0805nexter-1.1.0.zip', '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035' # noqa ): b'L\x99\x89\x1f\x93\xb8\x14P8Ww#Z7\xb5\xe9f\xdd\x15q', ( '0805nexter-1.2.0.zip', '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709' # noqa ): b'\xe4E\xdaM\xa2+1\xbf\xeb\xb6\xff\xc48=\xbf\x83\x9a\x07M!', ( '0805nexter-1.3.0.zip', '7097c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1' # noqa ): b'\xfbF\xe4\x96\x05\xb0\xbb\xe6\x9f\x8cS\xd3\x15\xe8\x93p\xe7\xc6\xcb]', # noqa } class LoaderChangesOldReleaseArtifactRemovedSinceLastVisit(BaseLoaderITest): """In this scenario, a visit has already taken place. An existing snapshot exists. The PyPI project has changed: - a new release has been uploaded - an older one has been removed The visit should result in a new snapshot. Such snapshot shares some of the same branches as prior visit (but not all): - new release artifact branch exists - old release artifact branch has been removed - the other unchanged release artifact branches are left unchanged """ def setUp(self, project_name='0805nexter', dummy_pypi_instance='https://dummy.org'): super().setUp(project_name, dummy_pypi_instance) self.loader = PyPILoaderWithSnapshot2( project_name=project_name, json_filename='0805nexter-unpublished-release.json') @istest def load(self): """Load PyPI origin with removed artifact + changes ~> 1 new snapshot """ # when self.loader.load( self._project, self._origin_url, self._project_metadata_url) # then self.assertEquals( len(self.loader.all_contents), 4, "3 + 1 new content (only change between 1.3.0 and 1.4.0 archives)") self.assertEquals(len(self.loader.all_directories), 2) self.assertEquals( len(self.loader.all_revisions), 1, "This results in 1 new revision targetting that new directory id") self.assertEquals(len(self.loader.all_releases), 0) self.assertEquals(len(self.loader.all_snapshots), 1) expected_contents = [ 'e2d68a197e3a3ad0fc6de28749077892c2148043', # new one '405859113963cb7a797642b45f171d6360425d16', '83ecf6ec1114fd260ca7a833a2d165e71258c338', 'e5686aa568fdb1d19d7f1329267082fe40482d31', ] self.assertContentsOk(expected_contents) expected_directories = [ 'a2b7621f3e52eb3632657f6e3436bd08202db56f', # new one '770e21215ecac53cea331d8ea4dc0ffc9d979367', ] self.assertDirectoriesOk(expected_directories) expected_revisions = { # 1.4.0 '5e91875f096ac48c98d74acf307439a3490f2827': '770e21215ecac53cea331d8ea4dc0ffc9d979367', # noqa } self.assertRevisionsOk(expected_revisions) - expected_snapshot_id = 'ddc940677eda1ca270c8b7e8d7796f8f98428522' + expected_snapshot_id = 'bb0b0c29040678eadb6dae9e43e496cc860123e4' expected_branches = { - 'refs/tags/1.2.0': { + 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, - 'refs/tags/1.3.0': { + 'releases/1.3.0': { 'target': 'fb46e49605b0bbe69f8c53d315e89370e7c6cb5d', 'target_type': 'revision', }, - 'refs/tags/1.4.0': { + 'releases/1.4.0': { 'target': '5e91875f096ac48c98d74acf307439a3490f2827', 'target_type': 'revision', }, 'HEAD': { - 'target': '5e91875f096ac48c98d74acf307439a3490f2827', - 'target_type': 'revision', + 'target': 'releases/1.4.0', + 'target_type': 'alias', }, } self.assertSnapshotOk(expected_snapshot_id, expected_branches) _id = hashutil.hash_to_hex(self.loader._last_snapshot()['id']) self.assertNotEqual(expected_snapshot_id, _id) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full')