diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py index 17662c5..fd919f7 100644 --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -1,408 +1,404 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import email.utils import iso8601 import logging import re import subprocess from dateutil.parser import parse as parse_date from debian.changelog import Changelog from debian.deb822 import Dsc from os import path from typing import ( Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple ) from swh.loader.package.loader import PackageLoader from swh.loader.package.utils import download, release_name logger = logging.getLogger(__name__) UPLOADERS_SPLIT = re.compile(r'(?<=\>)\s*,\s*') class DebianLoader(PackageLoader): """Load debian origins into swh archive. """ visit_type = 'deb' def __init__(self, url: str, date: str, packages: Mapping[str, Any]): """Debian Loader implementation. Args: url: Origin url (e.g. deb://Debian/packages/cicero) date: Ignored packages: versioned packages and associated artifacts, example:: { 'stretch/contrib/0.7.2-3': { 'name': 'cicero', 'version': '0.7.2-3' 'files': { 'cicero_0.7.2-3.diff.gz': { 'md5sum': 'a93661b6a48db48d59ba7d26796fc9ce', 'name': 'cicero_0.7.2-3.diff.gz', 'sha256': 'f039c9642fe15c75bed5254315e2a29f...', 'size': 3964, 'uri': 'http://d.d.o/cicero_0.7.2-3.diff.gz', }, 'cicero_0.7.2-3.dsc': { 'md5sum': 'd5dac83eb9cfc9bb52a15eb618b4670a', 'name': 'cicero_0.7.2-3.dsc', 'sha256': '35b7f1048010c67adfd8d70e4961aefb...', 'size': 1864, 'uri': 'http://d.d.o/cicero_0.7.2-3.dsc', }, 'cicero_0.7.2.orig.tar.gz': { 'md5sum': '4353dede07c5728319ba7f5595a7230a', 'name': 'cicero_0.7.2.orig.tar.gz', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbab...', 'size': 96527, 'uri': 'http://d.d.o/cicero_0.7.2.orig.tar.gz', } }, }, # ... } """ super().__init__(url=url) self.packages = packages def get_versions(self) -> Sequence[str]: """Returns the keys of the packages input (e.g. stretch/contrib/0.7.2-3, etc...) """ return list(self.packages.keys()) def get_package_info(self, version: str) -> Generator[ Tuple[str, Mapping[str, Any]], None, None]: meta = self.packages[version] p_info = meta.copy() p_info['raw'] = meta yield release_name(version), p_info def resolve_revision_from( self, known_package_artifacts: Mapping, artifact_metadata: Mapping) \ -> Optional[bytes]: return resolve_revision_from( known_package_artifacts, artifact_metadata) def download_package(self, p_info: Mapping[str, Any], tmpdir: str) -> List[Tuple[str, Mapping]]: """Contrary to other package loaders (1 package, 1 artifact), `a_metadata` represents the package's datafiles set to fetch: - .orig.tar.gz - .dsc - .diff.gz This is delegated to the `download_package` function. """ all_hashes = download_package(p_info, tmpdir) logger.debug('all_hashes: %s', all_hashes) res = [] for hashes in all_hashes.values(): res.append((tmpdir, hashes)) logger.debug('res: %s', res) return res def uncompress(self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], dest: str) -> str: logger.debug('dl_artifacts: %s', dl_artifacts) return extract_package(dl_artifacts, dest=dest) def build_revision(self, a_metadata: Mapping[str, Any], uncompressed_path: str) -> Dict: dsc_url, dsc_name = dsc_information(a_metadata) if not dsc_name: raise ValueError( 'dsc name for url %s should not be None' % dsc_url) dsc_path = path.join(path.dirname(uncompressed_path), dsc_name) i_metadata = get_package_metadata( a_metadata, dsc_path, uncompressed_path) logger.debug('i_metadata: %s', i_metadata) logger.debug('a_metadata: %s', a_metadata) msg = 'Synthetic revision for Debian source package %s version %s' % ( a_metadata['name'], a_metadata['version']) date = iso8601.parse_date(i_metadata['changelog']['date']) author = prepare_person(i_metadata['changelog']['person']) # inspired from swh.loader.debian.converters.package_metadata_to_revision # noqa return { 'type': 'dsc', 'message': msg.encode('utf-8'), 'author': author, 'date': date, 'committer': author, 'committer_date': date, 'parents': [], 'metadata': { 'intrinsic': { 'tool': 'dsc', 'raw': i_metadata, }, 'extrinsic': { 'provider': dsc_url, 'when': self.visit_date.isoformat(), 'raw': a_metadata, }, } } def resolve_revision_from(known_package_artifacts: Mapping, artifact_metadata: Mapping) -> Optional[bytes]: """Given known package artifacts (resolved from the snapshot of previous visit) and the new artifact to fetch, try to solve the corresponding revision. """ artifacts_to_fetch = artifact_metadata.get('files') if not artifacts_to_fetch: return None - logger.debug('k_p_artifacts: %s', known_package_artifacts) - logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch) + + def to_set(data): + return frozenset([ + (name, meta['sha256'], meta['size']) + for name, meta in data['files'].items() + ]) + + # what we want to avoid downloading back if we have them already + set_new_artifacts = to_set(artifact_metadata) + + known_artifacts_revision_id = {} for rev_id, known_artifacts in known_package_artifacts.items(): - logger.debug('Revision: %s', rev_id) - logger.debug('Associated known_artifacts: %s', known_artifacts) - known_artifacts = known_artifacts['extrinsic']['raw']['files'] - rev_found = True - for a_name, k_artifact in known_artifacts.items(): - artifact_to_fetch = artifacts_to_fetch.get(a_name) - logger.debug('artifact_to_fetch: %s', artifact_to_fetch) - if artifact_to_fetch is None: - # as soon as we do not see an artifact, we consider we need - # to check the other revision - rev_found = False - if k_artifact['sha256'] != artifact_to_fetch['sha256']: - # Hash is different, we consider we need to check the other - # revisions - rev_found = False - if rev_found: - logger.debug('Existing revision %s found for new artifacts.', - rev_id) - return rev_id - logger.debug('No existing revision found for the new artifacts.') - return None + extrinsic = known_artifacts.get('extrinsic') + if not extrinsic: + continue + + s = to_set(extrinsic['raw']) + known_artifacts_revision_id[s] = rev_id + + return known_artifacts_revision_id.get(set_new_artifacts) def uid_to_person(uid: str) -> Mapping[str, str]: """Convert an uid to a person suitable for insertion. Args: uid: an uid of the form "Name " Returns: a dictionary with the following keys: - name: the name associated to the uid - email: the mail associated to the uid - fullname: the actual uid input """ logger.debug('uid: %s', uid) ret = { 'name': '', 'email': '', 'fullname': uid, } name, mail = email.utils.parseaddr(uid) if name and email: ret['name'] = name ret['email'] = mail else: ret['name'] = uid return ret def prepare_person(person: Mapping[str, str]) -> Mapping[str, bytes]: """Prepare person for swh serialization... Args: A person dict Returns: A person dict ready for storage """ ret = {} for key, value in person.items(): ret[key] = value.encode('utf-8') return ret def download_package( package: Mapping[str, Any], tmpdir: Any) -> Mapping[str, Any]: """Fetch a source package in a temporary directory and check the checksums for all files. Args: package: Dict defining the set of files representing a debian package tmpdir: Where to download and extract the files to ingest Returns: Dict of swh hashes per filename key """ all_hashes = {} for filename, fileinfo in package['files'].items(): uri = fileinfo['uri'] logger.debug('fileinfo: %s', fileinfo) extrinsic_hashes = {'sha256': fileinfo['sha256']} logger.debug('extrinsic_hashes(%s): %s', filename, extrinsic_hashes) filepath, hashes = download(uri, dest=tmpdir, filename=filename, hashes=extrinsic_hashes) all_hashes[filename] = hashes logger.debug('all_hashes: %s', all_hashes) return all_hashes def dsc_information(package: Mapping[str, Any]) -> Tuple[ Optional[str], Optional[str]]: """Retrieve dsc information from a package. Args: package: Package metadata information Returns: Tuple of dsc file's uri, dsc's full disk path """ dsc_name = None dsc_url = None for filename, fileinfo in package['files'].items(): if filename.endswith('.dsc'): if dsc_name: raise ValueError( 'Package %s_%s references several dsc files.' % (package['name'], package['version']) ) dsc_url = fileinfo['uri'] dsc_name = filename return dsc_url, dsc_name def extract_package(dl_artifacts: List[Tuple[str, Mapping]], dest: str) -> str: """Extract a Debian source package to a given directory. Note that after extraction the target directory will be the root of the extracted package, rather than containing it. Args: package: package information dictionary dest: directory where the package files are stored Returns: Package extraction directory """ a_path = dl_artifacts[0][0] logger.debug('dl_artifacts: %s', dl_artifacts) for _, hashes in dl_artifacts: logger.debug('hashes: %s', hashes) filename = hashes['filename'] if filename.endswith('.dsc'): dsc_name = filename break dsc_path = path.join(a_path, dsc_name) destdir = path.join(dest, 'extracted') logfile = path.join(dest, 'extract.log') logger.debug('extract Debian source package %s in %s' % (dsc_path, destdir), extra={ 'swh_type': 'deb_extract', 'swh_dsc': dsc_path, 'swh_destdir': destdir, }) cmd = ['dpkg-source', '--no-copy', '--no-check', '--ignore-bad-version', '-x', dsc_path, destdir] try: with open(logfile, 'w') as stdout: subprocess.check_call(cmd, stdout=stdout, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: logdata = open(logfile, 'r').read() raise ValueError('dpkg-source exited with code %s: %s' % (e.returncode, logdata)) from None return destdir def get_package_metadata(package: Mapping[str, Any], dsc_path: str, extracted_path: str) -> Mapping[str, Any]: """Get the package metadata from the source package at dsc_path, extracted in extracted_path. Args: package: the package dict (with a dsc_path key) dsc_path: path to the package's dsc file extracted_path: the path where the package got extracted Returns: dict: a dictionary with the following keys: - history: list of (package_name, package_version) tuples parsed from the package changelog """ with open(dsc_path, 'rb') as dsc: parsed_dsc = Dsc(dsc) # Parse the changelog to retrieve the rest of the package information changelog_path = path.join(extracted_path, 'debian/changelog') with open(changelog_path, 'rb') as changelog: try: parsed_changelog = Changelog(changelog) except UnicodeDecodeError: logger.warning('Unknown encoding for changelog %s,' ' falling back to iso' % changelog_path, extra={ 'swh_type': 'deb_changelog_encoding', 'swh_name': package['name'], 'swh_version': str(package['version']), 'swh_changelog': changelog_path, }) # need to reset as Changelog scrolls to the end of the file changelog.seek(0) parsed_changelog = Changelog(changelog, encoding='iso-8859-15') package_info = { 'name': package['name'], 'version': str(package['version']), 'changelog': { 'person': uid_to_person(parsed_changelog.author), 'date': parse_date(parsed_changelog.date).isoformat(), 'history': [(block.package, str(block.version)) for block in parsed_changelog][1:], } } maintainers = [ uid_to_person(parsed_dsc['Maintainer']), ] maintainers.extend( uid_to_person(person) for person in UPLOADERS_SPLIT.split(parsed_dsc.get('Uploaders', '')) ) package_info['maintainers'] = maintainers return package_info diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py index 4db2d94..bc7257c 100644 --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -1,454 +1,468 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import logging import pytest +import random from os import path from swh.loader.package.debian.loader import ( DebianLoader, download_package, dsc_information, uid_to_person, prepare_person, get_package_metadata, extract_package ) from swh.loader.package.tests.common import check_snapshot, get_stats from swh.loader.package.debian.loader import resolve_revision_from logger = logging.getLogger(__name__) PACKAGE_FILES = { 'name': 'cicero', 'version': '0.7.2-3', 'files': { 'cicero_0.7.2-3.diff.gz': { 'md5sum': 'a93661b6a48db48d59ba7d26796fc9ce', 'name': 'cicero_0.7.2-3.diff.gz', 'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c', # noqa 'size': 3964, 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.diff.gz' # noqa }, 'cicero_0.7.2-3.dsc': { 'md5sum': 'd5dac83eb9cfc9bb52a15eb618b4670a', 'name': 'cicero_0.7.2-3.dsc', 'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03', # noqa 'size': 1864, 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc'}, # noqa 'cicero_0.7.2.orig.tar.gz': { 'md5sum': '4353dede07c5728319ba7f5595a7230a', 'name': 'cicero_0.7.2.orig.tar.gz', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786', # noqa 'size': 96527, 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz' # noqa } }, } PACKAGE_FILES2 = { 'name': 'cicero', 'version': '0.7.2-4', 'files': { 'cicero_0.7.2-4.diff.gz': { 'md5sum': '1e7e6fc4a59d57c98082a3af78145734', 'name': 'cicero_0.7.2-4.diff.gz', 'sha256': '2e6fa296ee7005473ff58d0971f4fd325617b445671480e9f2cfb738d5dbcd01', # noqa 'size': 4038, 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-4.diff.gz' # noqa }, 'cicero_0.7.2-4.dsc': { 'md5sum': '1a6c8855a73b4282bb31d15518f18cde', 'name': 'cicero_0.7.2-4.dsc', 'sha256': '913ee52f7093913420de5cbe95d63cfa817f1a1daf997961149501894e754f8b', # noqa 'size': 1881, 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-4.dsc'}, # noqa 'cicero_0.7.2.orig.tar.gz': { 'md5sum': '4353dede07c5728319ba7f5595a7230a', 'name': 'cicero_0.7.2.orig.tar.gz', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786', # noqa 'size': 96527, 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz' # noqa } } } PACKAGE_PER_VERSION = { 'stretch/contrib/0.7.2-3': PACKAGE_FILES, } PACKAGES_PER_VERSION = { 'stretch/contrib/0.7.2-3': PACKAGE_FILES, 'buster/contrib/0.7.2-4': PACKAGE_FILES2, } def test_debian_first_visit( swh_config, requests_mock_datadir): """With no prior visit, load a gnu project ends up with 1 snapshot """ loader = DebianLoader( url='deb://Debian/packages/cicero', date='2019-10-12T05:58:09.165557+00:00', packages=PACKAGE_PER_VERSION) actual_load_status = loader.load() expected_snapshot_id = '3b6b66e6ee4e7d903a379a882684a2a50480c0b4' assert actual_load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id } stats = get_stats(loader.storage) assert { 'content': 42, 'directory': 2, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 1, # all artifacts under 1 revision 'skipped_content': 0, 'snapshot': 1 } == stats expected_snapshot = { 'id': expected_snapshot_id, 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07', } }, } # different than the previous loader as no release is done check_snapshot(expected_snapshot, loader.storage) def test_debian_first_visit_then_another_visit( swh_config, requests_mock_datadir): """With no prior visit, load a debian project ends up with 1 snapshot """ url = 'deb://Debian/packages/cicero' loader = DebianLoader( url=url, date='2019-10-12T05:58:09.165557+00:00', packages=PACKAGE_PER_VERSION) actual_load_status = loader.load() expected_snapshot_id = '3b6b66e6ee4e7d903a379a882684a2a50480c0b4' assert actual_load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id } origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' assert origin_visit['type'] == 'deb' stats = get_stats(loader.storage) assert { 'content': 42, 'directory': 2, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 1, # all artifacts under 1 revision 'skipped_content': 0, 'snapshot': 1 } == stats expected_snapshot = { 'id': expected_snapshot_id, 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07', } }, } # different than the previous loader as no release is done check_snapshot(expected_snapshot, loader.storage) # No change in between load actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'uneventful' origin_visit2 = list(loader.storage.origin_visit_get(url)) assert origin_visit2[-1]['status'] == 'full' assert origin_visit2[-1]['type'] == 'deb' stats2 = get_stats(loader.storage) assert { 'content': 42 + 0, 'directory': 2 + 0, 'origin': 1, 'origin_visit': 1 + 1, # a new visit occurred 'person': 1, 'release': 0, 'revision': 1, 'skipped_content': 0, 'snapshot': 1, # same snapshot across 2 visits } == stats2 urls = [ m.url for m in requests_mock_datadir.request_history if m.url.startswith('http://deb.debian.org') ] # visited each package artifact twice across 2 visits assert len(urls) == len(set(urls)) def test_uid_to_person(): uid = 'Someone Name ' actual_person = uid_to_person(uid) assert actual_person == { 'name': 'Someone Name', 'email': 'someone@orga.org', 'fullname': uid, } def test_prepare_person(): actual_author = prepare_person({ 'name': 'Someone Name', 'email': 'someone@orga.org', 'fullname': 'Someone Name ', }) assert actual_author == { 'name': b'Someone Name', 'email': b'someone@orga.org', 'fullname': b'Someone Name ', } def test_download_package(datadir, tmpdir, requests_mock_datadir): tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) all_hashes = download_package(PACKAGE_FILES, tmpdir) assert all_hashes == { 'cicero_0.7.2-3.diff.gz': { 'checksums': { 'blake2s256': '08b1c438e70d2474bab843d826515147fa4a817f8c4baaf3ddfbeb5132183f21', # noqa 'sha1': '0815282053f21601b0ec4adf7a8fe47eace3c0bc', 'sha1_git': '834ac91da3a9da8f23f47004bb456dd5bd16fe49', 'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c' # noqa }, 'filename': 'cicero_0.7.2-3.diff.gz', 'length': 3964}, 'cicero_0.7.2-3.dsc': { 'checksums': { 'blake2s256': '8c002bead3e35818eaa9d00826f3d141345707c58fb073beaa8abecf4bde45d2', # noqa 'sha1': 'abbec4e8efbbc80278236e1dd136831eac08accd', 'sha1_git': '1f94b2086fa1142c2df6b94092f5c5fa11093a8e', 'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03' # noqa }, 'filename': 'cicero_0.7.2-3.dsc', 'length': 1864}, 'cicero_0.7.2.orig.tar.gz': { 'checksums': { 'blake2s256': '9809aa8d2e2dad7f34cef72883db42b0456ab7c8f1418a636eebd30ab71a15a6', # noqa 'sha1': 'a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43', 'sha1_git': 'aa0a38978dce86d531b5b0299b4a616b95c64c74', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786' # noqa }, 'filename': 'cicero_0.7.2.orig.tar.gz', 'length': 96527 } } def test_dsc_information_ok(): fname = 'cicero_0.7.2-3.dsc' dsc_url, dsc_name = dsc_information(PACKAGE_FILES) assert dsc_url == PACKAGE_FILES['files'][fname]['uri'] assert dsc_name == PACKAGE_FILES['files'][fname]['name'] def test_dsc_information_not_found(): fname = 'cicero_0.7.2-3.dsc' package_files = copy.deepcopy(PACKAGE_FILES) package_files['files'].pop(fname) dsc_url, dsc_name = dsc_information(package_files) assert dsc_url is None assert dsc_name is None def test_dsc_information_too_many_dsc_entries(): # craft an extra dsc file fname = 'cicero_0.7.2-3.dsc' package_files = copy.deepcopy(PACKAGE_FILES) data = package_files['files'][fname] fname2 = fname.replace('cicero', 'ciceroo') package_files['files'][fname2] = data with pytest.raises( ValueError, match='Package %s_%s references several dsc' % ( package_files['name'], package_files['version'])): dsc_information(package_files) def test_get_package_metadata(requests_mock_datadir, datadir, tmp_path): tmp_path = str(tmp_path) # py3.5 compat. package = PACKAGE_FILES logger.debug('package: %s', package) # download the packages all_hashes = download_package(package, tmp_path) # Retrieve information from package _, dsc_name = dsc_information(package) dl_artifacts = [(tmp_path, hashes) for hashes in all_hashes.values()] # Extract information from package extracted_path = extract_package(dl_artifacts, tmp_path) # Retrieve information on package dsc_path = path.join(path.dirname(extracted_path), dsc_name) actual_package_info = get_package_metadata( package, dsc_path, extracted_path) logger.debug('actual_package_info: %s', actual_package_info) assert actual_package_info == { 'changelog': { 'date': '2014-10-19T16:52:35+02:00', 'history': [ ('cicero', '0.7.2-2'), ('cicero', '0.7.2-1'), ('cicero', '0.7-1') ], 'person': { 'email': 'sthibault@debian.org', 'fullname': 'Samuel Thibault ', 'name': 'Samuel Thibault' } }, 'maintainers': [ { 'email': 'debian-accessibility@lists.debian.org', 'fullname': 'Debian Accessibility Team ' '', 'name': 'Debian Accessibility Team' }, { 'email': 'sthibault@debian.org', 'fullname': 'Samuel Thibault ', 'name': 'Samuel Thibault' } ], 'name': 'cicero', 'version': '0.7.2-3' } def test_debian_multiple_packages(swh_config, requests_mock_datadir): url = 'deb://Debian/packages/cicero' loader = DebianLoader( url=url, date='2019-10-12T05:58:09.165557+00:00', packages=PACKAGES_PER_VERSION) actual_load_status = loader.load() expected_snapshot_id = 'defc19021187f3727293121fcf6c5c82cb923604' assert actual_load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id } origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' assert origin_visit['type'] == 'deb' expected_snapshot = { 'id': expected_snapshot_id, 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07', }, 'releases/buster/contrib/0.7.2-4': { 'target_type': 'revision', 'target': '8224139c274c984147ef4b09aa0e462c55a10bd3', } }, } check_snapshot(expected_snapshot, loader.storage) def test_resolve_revision_from_edge_cases(): """Solving revision with empty data will result in unknown revision """ for package_artifacts in [{}, PACKAGE_FILES]: actual_revision = resolve_revision_from( package_artifacts, {}) assert actual_revision is None for known_artifacts in [{}, PACKAGE_FILES]: actual_revision = resolve_revision_from( {}, known_artifacts) assert actual_revision is None - expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa + known_package_artifacts = { + b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07": { + 'extrinsic': { + # empty + }, + # ... removed the unnecessary intermediary data + } + } + assert not resolve_revision_from(known_package_artifacts, PACKAGE_FILES) def test_resolve_revision_from_edge_cases_hit_and_miss(): """Solving revision with inconsistent data will result in unknown revision """ artifact_metadata = PACKAGE_FILES2 - expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa + expected_revision_id = b"(\x08\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xff\x85\x85O\xfe\xcf\x07" # noqa known_package_artifacts = { expected_revision_id: { 'extrinsic': { - 'provider': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc', # noqa 'raw': PACKAGE_FILES, - 'when': '2019-12-19T17:04:32.161965+00:00' }, # ... removed the unnecessary intermediary data } } actual_revision = resolve_revision_from( known_package_artifacts, artifact_metadata ) assert actual_revision is None def test_resolve_revision_from(): """Solving revision with consistent data will solve the revision """ artifact_metadata = PACKAGE_FILES expected_revision_id = b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa + + files = artifact_metadata['files'] + # shuffling dict's keys + keys = list(files.keys()) + random.shuffle(keys) + package_files = { + 'files': {k: files[k] for k in keys} + } + known_package_artifacts = { expected_revision_id: { 'extrinsic': { - 'provider': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc', # noqa - 'raw': PACKAGE_FILES, - 'when': '2019-12-19T17:04:32.161965+00:00' + 'raw': package_files, }, # ... removed the unnecessary intermediary data } } actual_revision = resolve_revision_from( known_package_artifacts, artifact_metadata ) assert actual_revision == expected_revision_id