diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py index 1ee6a2a..40b943d 100644 --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -1,369 +1,370 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import hashlib import logging import pytest from swh.model.hashutil import hash_to_bytes from swh.loader.core.loader import BufferedLoader, UnbufferedLoader from . import BaseLoaderTest class DummyLoader: def cleanup(self): pass def prepare(self): pass def fetch_data(self): pass def store_data(self): pass def prepare_origin_visit(self): origin = self.storage.origin_get( self._test_prepare_origin_visit_data['origin']) self.origin = origin self.origin_url = origin['url'] self.visit_date = datetime.datetime.utcnow() - self.storage.origin_visit_add(origin['id'], self.visit_date) + self.storage.origin_visit_add(origin['id'], self.visit_date, + origin['type']) def parse_config_file(self, *args, **kwargs): return { 'storage': { 'cls': 'memory', 'args': { } }, 'send_contents': True, 'send_directories': True, 'send_revisions': True, 'send_releases': True, 'send_snapshot': True, 'content_packet_size': 2, 'content_packet_size_bytes': 8, 'directory_packet_size': 2, 'revision_packet_size': 2, 'release_packet_size': 2, 'content_size_limit': 10000, } class DummyUnbufferedLoader(DummyLoader, UnbufferedLoader): pass class DummyBufferedLoader(DummyLoader, BufferedLoader): pass class DummyBaseLoaderTest(BaseLoaderTest): def setUp(self): self.loader = self.loader_class(logging_class='dummyloader') self.loader.visit_type = 'git' # do not call voluntarily super().setUp() self.storage = self.loader.storage contents = [ { 'sha1': '34973274ccef6ab4dfaaf86599792fa9c3fe4689', 'sha1_git': b'bar1', 'sha256': b'baz1', 'blake2s256': b'qux1', 'status': 'visible', 'data': b'data1', 'length': 5, }, { 'sha1': '61c2b3a30496d329e21af70dd2d7e097046d07b7', 'sha1_git': b'bar2', 'sha256': b'baz2', 'blake2s256': b'qux2', 'status': 'visible', 'data': b'data2', 'length': 5, }, ] self.expected_contents = [content['sha1'] for content in contents] self.in_contents = contents.copy() for content in self.in_contents: content['sha1'] = hash_to_bytes(content['sha1']) self.in_directories = [ {'id': hash_to_bytes(id_), 'entries': []} for id_ in [ '44e45d56f88993aae6a0198013efa80716fd8921', '54e45d56f88993aae6a0198013efa80716fd8920', '43e45d56f88993aae6a0198013efa80716fd8920', ] ] person = { 'name': b'John Doe', 'email': b'john.doe@institute.org', 'fullname': b'John Doe ' } rev1_id = b'\x00'*20 rev2_id = b'\x01'*20 self.in_revisions = [ { 'id': rev1_id, 'type': 'git', 'date': 1567591673, 'committer_date': 1567591673, 'author': person, 'committer': person, 'message': b'msg1', 'directory': None, 'synthetic': False, 'metadata': None, 'parents': [], }, { 'id': rev2_id, 'type': 'hg', 'date': 1567591673, 'committer_date': 1567591673, 'author': person, 'committer': person, 'message': b'msg2', 'directory': None, 'synthetic': False, 'metadata': None, 'parents': [], }, ] self.in_releases = [ { 'name': b'rel1', 'id': b'\x02'*20, 'date': None, 'author': person, 'target_type': 'revision', 'target': rev1_id, 'message': None, 'synthetic': False, }, { 'name': b'rel2', 'id': b'\x03'*20, 'date': None, 'author': person, 'target_type': 'revision', 'target': rev2_id, 'message': None, 'synthetic': False, }, ] self.in_origin = { 'type': self.loader.visit_type, 'url': 'http://example.com/', } self.in_snapshot = { 'id': b'snap1', 'branches': {}, } self.in_provider = { 'provider_name': 'Test Provider', 'provider_type': 'test_provider', 'provider_url': 'http://example.org/metadata_provider', 'metadata': {'working': True}, } self.in_tool = { 'name': 'Test Tool', 'version': 'v1.2.3', 'configuration': {'in_the_Matrix': 'maybe'}, } self.storage.origin_add([self.in_origin]) # used by prepare_origin_visit() when it gets called self.loader._test_prepare_origin_visit_data = { 'origin': self.in_origin, } def tearDown(self): # do not call voluntarily super().tearDown() pass class CoreUnbufferedLoaderTest(DummyBaseLoaderTest): loader_class = DummyUnbufferedLoader def test_unbuffered_loader(self): self.loader.load() # initialize the loader self.loader.send_contents(self.in_contents[0:1]) self.loader.send_directories(self.in_directories[0:1]) self.loader.send_revisions(self.in_revisions[0:1]) self.loader.send_releases(self.in_releases[0:1]) self.assertCountContents(1) self.assertCountDirectories(1) self.assertCountRevisions(1) self.assertCountReleases(1) self.loader.send_contents(self.in_contents[1:]) self.loader.send_directories(self.in_directories[1:]) self.loader.send_revisions(self.in_revisions[1:]) self.loader.send_releases(self.in_releases[1:]) self.assertCountContents(len(self.in_contents)) self.assertCountDirectories(len(self.in_directories)) self.assertCountRevisions(len(self.in_revisions)) self.assertCountReleases(len(self.in_releases)) class CoreBufferedLoaderTest(DummyBaseLoaderTest): loader_class = DummyBufferedLoader def test_buffered_loader(self): self.loader.load() # initialize the loader self.loader.maybe_load_contents(self.in_contents[0:1]) self.loader.maybe_load_directories(self.in_directories[0:1]) self.loader.maybe_load_revisions(self.in_revisions[0:1]) self.loader.maybe_load_releases(self.in_releases[0:1]) self.assertCountContents(0) self.assertCountDirectories(0) self.assertCountRevisions(0) self.assertCountReleases(0) self.loader.maybe_load_contents(self.in_contents[1:]) self.loader.maybe_load_directories(self.in_directories[1:]) self.loader.maybe_load_revisions(self.in_revisions[1:]) self.loader.maybe_load_releases(self.in_releases[1:]) self.assertCountContents(len(self.in_contents)) self.assertCountDirectories(len(self.in_directories)) self.assertCountRevisions(len(self.in_revisions)) self.assertCountReleases(len(self.in_releases)) def test_directory_cascade(self): """Checks that sending a directory triggers sending contents""" self.loader.load() # initialize the loader self.loader.maybe_load_contents(self.in_contents[0:1]) self.loader.maybe_load_directories(self.in_directories) self.assertCountContents(1) self.assertCountDirectories(len(self.in_directories)) def test_revision_cascade(self): """Checks that sending a revision triggers sending contents and directories.""" self.loader.load() # initialize the loader self.loader.maybe_load_contents(self.in_contents[0:1]) self.loader.maybe_load_directories(self.in_directories[0:1]) self.loader.maybe_load_revisions(self.in_revisions) self.assertCountContents(1) self.assertCountDirectories(1) self.assertCountRevisions(len(self.in_revisions)) def test_release_cascade(self): """Checks that sending a release triggers sending revisions, contents, and directories.""" self.loader.load() # initialize the loader self.loader.maybe_load_contents(self.in_contents[0:1]) self.loader.maybe_load_directories(self.in_directories[0:1]) self.loader.maybe_load_revisions(self.in_revisions[0:1]) self.loader.maybe_load_releases(self.in_releases) self.assertCountContents(1) self.assertCountDirectories(1) self.assertCountRevisions(1) self.assertCountReleases(len(self.in_releases)) def test_snapshot_cascade(self): """Checks that sending a snapshot triggers sending releases, revisions, contents, and directories.""" self.loader.load() # initialize the loader self.loader.maybe_load_contents(self.in_contents[0:1]) self.loader.maybe_load_directories(self.in_directories[0:1]) self.loader.maybe_load_revisions(self.in_revisions[0:1]) self.loader.maybe_load_releases(self.in_releases[0:1]) self.loader.maybe_load_snapshot(self.in_snapshot) self.assertCountContents(1) self.assertCountDirectories(1) self.assertCountRevisions(1) self.assertCountReleases(1) self.assertCountSnapshots(1) def test_origin_metadata(self): self.loader.load() provider_id = self.loader.send_provider(self.in_provider) tool_id = self.loader.send_tool(self.in_tool) self.loader.send_origin_metadata( self.loader.visit_date, provider_id, tool_id, {'test_metadata': 'foobar'}) self.assertOriginMetadataContains( self.in_origin['type'], self.in_origin['url'], {'test_metadata': 'foobar'}) with self.assertRaises(AssertionError): self.assertOriginMetadataContains( self.in_origin['type'], self.in_origin['url'], {'test_metadata': 'foobarbaz'}) with self.assertRaises(Exception): self.assertOriginMetadataContains( self.in_origin['type'], self.in_origin['url'] + 'blah', {'test_metadata': 'foobar'}) def test_loader_logger_default_name(): loader = DummyBufferedLoader() assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'swh.loader.core.tests.test_loader.DummyBufferedLoader' loader = DummyUnbufferedLoader() assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'swh.loader.core.tests.test_loader.DummyUnbufferedLoader' def test_loader_logger_with_name(): loader = DummyBufferedLoader('some.logger.name') assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'some.logger.name' @pytest.mark.fs def test_loader_save_data_path(tmp_path): loader = DummyBufferedLoader('some.logger.name.1') url = 'http://bitbucket.org/something' loader.origin = { 'url': url, } loader.visit_date = datetime.datetime(year=2019, month=10, day=1) loader.config = { 'save_data_path': tmp_path, } hash_url = hashlib.sha1(url.encode('utf-8')).hexdigest() expected_save_path = '%s/sha1:%s/%s/2019' % ( str(tmp_path), hash_url[0:2], hash_url ) save_path = loader.get_save_data_path() assert save_path == expected_save_path diff --git a/swh/loader/package/tests/common.py b/swh/loader/package/tests/common.py index 062b90a..36cfc18 100644 --- a/swh/loader/package/tests/common.py +++ b/swh/loader/package/tests/common.py @@ -1,126 +1,127 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from os import path import logging from typing import Dict, List, Tuple from swh.model.hashutil import hash_to_bytes, hash_to_hex logger = logging.getLogger(__file__) DATADIR = path.join(path.abspath(path.dirname(__file__)), 'resources') def decode_target(target): """Test helper to ease readability in test """ if not target: return target target_type = target['target_type'] if target_type == 'alias': decoded_target = target['target'].decode('utf-8') else: decoded_target = hash_to_hex(target['target']) return { 'target': decoded_target, 'target_type': target_type } def check_snapshot(expected_snapshot, storage): """Check for snapshot match. Provide the hashes as hexadecimal, the conversion is done within the method. Args: expected_snapshot (dict): full snapshot with hex ids storage (Storage): expected storage """ expected_snapshot_id = expected_snapshot['id'] expected_branches = expected_snapshot['branches'] snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) if snap is None: - # display known snapshots instead - from pprint import pprint - for snap_id, (_snap, _) in storage._snapshots.items(): - snapd = _snap.to_dict() - snapd['id'] = hash_to_hex(snapd['id']) - branches = { - branch.decode('utf-8'): decode_target(target) - for branch, target in snapd['branches'].items() - } - snapd['branches'] = branches - pprint(snapd) + # display known snapshots instead if possible + if hasattr(storage, '_snapshots'): # in-mem storage + from pprint import pprint + for snap_id, (_snap, _) in storage._snapshots.items(): + snapd = _snap.to_dict() + snapd['id'] = hash_to_hex(snapd['id']) + branches = { + branch.decode('utf-8'): decode_target(target) + for branch, target in snapd['branches'].items() + } + snapd['branches'] = branches + pprint(snapd) raise AssertionError('Snapshot is not found') branches = { branch.decode('utf-8'): decode_target(target) for branch, target in snap['branches'].items() } assert expected_branches == branches def check_metadata(metadata: Dict, key_path: str, raw_type: str): """Given a metadata dict, ensure the associated key_path value is of type raw_type. Args: metadata: Dict to check key_path: Path to check raw_type: Type to check the path with Raises: Assertion error in case of mismatch """ data = metadata keys = key_path.split('.') for k in keys: try: data = data[k] except (TypeError, KeyError) as e: # KeyError: because path too long # TypeError: data is not a dict raise AssertionError(e) assert isinstance(data, raw_type) def check_metadata_paths(metadata: Dict, paths: List[Tuple[str, str]]): """Given a metadata dict, ensure the keys are of expected types Args: metadata: Dict to check key_path: Path to check raw_type: Type to check the path with Raises: Assertion error in case of mismatch """ for key_path, raw_type in paths: check_metadata(metadata, key_path, raw_type) def get_stats(storage) -> Dict: """Adaptation utils to unify the stats counters across storage implementation. """ storage.refresh_stat_counters() stats = storage.stat_counters() keys = ['content', 'directory', 'origin', 'origin_visit', 'person', 'release', 'revision', 'skipped_content', 'snapshot'] return {k: stats.get(k) for k in keys} diff --git a/swh/loader/package/tests/data/http_deb.debian.org/debian__pool_contrib_c_cicero_cicero_0.7.2-3.diff.gz b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.diff.gz similarity index 100% rename from swh/loader/package/tests/data/http_deb.debian.org/debian__pool_contrib_c_cicero_cicero_0.7.2-3.diff.gz rename to swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.diff.gz diff --git a/swh/loader/package/tests/data/http_deb.debian.org/debian__pool_contrib_c_cicero_cicero_0.7.2-3.dsc b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.dsc similarity index 100% rename from swh/loader/package/tests/data/http_deb.debian.org/debian__pool_contrib_c_cicero_cicero_0.7.2-3.dsc rename to swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.dsc diff --git a/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.diff.gz b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.diff.gz new file mode 100644 index 0000000..71726d2 Binary files /dev/null and b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.diff.gz differ diff --git a/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.dsc b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.dsc new file mode 100644 index 0000000..e5cd3ff --- /dev/null +++ b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.dsc @@ -0,0 +1,43 @@ +-----BEGIN PGP SIGNED MESSAGE----- +Hash: SHA512 + +Format: 1.0 +Source: cicero +Binary: cicero +Architecture: all +Version: 0.7.2-4 +Maintainer: Debian Accessibility Team +Uploaders: Samuel Thibault +Homepage: http://pages.infinit.net/fluxnic/cicero/ +Standards-Version: 4.1.4 +Vcs-Browser: https://anonscm.debian.org/git/pkg-a11y/cicero.git +Vcs-Git: https://anonscm.debian.org/git/pkg-a11y/cicero.git +Build-Depends: debhelper (>= 7) +Package-List: + cicero deb contrib/sound optional arch=all +Checksums-Sha1: + a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43 96527 cicero_0.7.2.orig.tar.gz + d21527f61e4ea81398337e4f20314bd6e72b48e3 4038 cicero_0.7.2-4.diff.gz +Checksums-Sha256: + 63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786 96527 cicero_0.7.2.orig.tar.gz + 2e6fa296ee7005473ff58d0971f4fd325617b445671480e9f2cfb738d5dbcd01 4038 cicero_0.7.2-4.diff.gz +Files: + 4353dede07c5728319ba7f5595a7230a 96527 cicero_0.7.2.orig.tar.gz + 1e7e6fc4a59d57c98082a3af78145734 4038 cicero_0.7.2-4.diff.gz + +-----BEGIN PGP SIGNATURE----- + +iQIzBAEBCgAdFiEEmjc9NmSo3GLaCjT9nlEeAcc38HUFAlrklRgACgkQnlEeAcc3 +8HXJBQ//XE8KG5H+XuJIYHIPv0MRKx3b8A5PUeyW3k2NRo7a70bRuGhe/xVtKr+B +OUSluKAYVcpATHLrJi0PdrC2RQ8E4ck25g8alW+3Dvi5YbMPjeg9dSdBk6kuxWO0 +64L1o4rfuyY5LE+fGVZ0nmSHak9apIJ9SP3Bgg0OodiFaqIurxXwOGI60jcp47Oy +sS+joZUziLY81SIkvx8GkMKzw1PW/k4Jo4L7S34iYMuMG/FcsUeHx1/8DdDoYKui +DihTifecPvBlHDs/7kFqdyASXSF+ilKx0SIUWsdkzzgIhIzO8fZ/vXEwMBaDfxum +uMQXg2KWW0TY/zRqPsgsfXRrCa1nwbxXJNf/YluNYWQ6uWd3KMEjJ71vHSkb3FKG +x1naDozDEeM0Sed1mT9eKqr/tfFl1NG6liJy3b8F8HXJiJrRp1ToGBqtL5VXz53z +3ssKcb/UxIhNujT4o7WGgiP+jWj/CIcYdsJ9keM4mA2FBzeaObz1scWi539JrdRA +oqnj7xVfWGAZAg0Ozce+7rKPQqACMB9vMzHJ0NpZZvIYUHSz9AwGvVoQKYc0CE/r +qFjVRcASNKHAAg+l4wv9n+zOdFUTeOa3hTxCHQAhSXnCWfw4zfsJQK0Ntbrcg94V +8IfnmEtJXEJwQWmEn17zEerEDEbc5+KqLApqwnYol5rulR1VNi4= +=VcXs +-----END PGP SIGNATURE----- diff --git a/swh/loader/package/tests/data/http_deb.debian.org/debian__pool_contrib_c_cicero_cicero_0.7.2.orig.tar.gz b/swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2.orig.tar.gz similarity index 100% rename from swh/loader/package/tests/data/http_deb.debian.org/debian__pool_contrib_c_cicero_cicero_0.7.2.orig.tar.gz rename to swh/loader/package/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2.orig.tar.gz diff --git a/swh/loader/package/tests/test_debian.py b/swh/loader/package/tests/test_debian.py index 9475b13..2b632be 100644 --- a/swh/loader/package/tests/test_debian.py +++ b/swh/loader/package/tests/test_debian.py @@ -1,308 +1,371 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import logging import pytest from os import path from swh.loader.package.debian import ( DebianLoader, download_package, dsc_information, uid_to_person, prepare_person, get_package_metadata, extract_package ) from swh.loader.package.tests.common import check_snapshot, get_stats logger = logging.getLogger(__name__) PACKAGE_FILES = { 'name': 'cicero', 'version': '0.7.2-3', 'files': { 'cicero_0.7.2-3.diff.gz': { 'md5sum': 'a93661b6a48db48d59ba7d26796fc9ce', 'name': 'cicero_0.7.2-3.diff.gz', 'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c', # noqa 'size': 3964, - 'uri': 'http://deb.debian.org/debian//pool/contrib/c/cicero/cicero_0.7.2-3.diff.gz' # noqa + 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.diff.gz' # noqa }, 'cicero_0.7.2-3.dsc': { 'md5sum': 'd5dac83eb9cfc9bb52a15eb618b4670a', 'name': 'cicero_0.7.2-3.dsc', 'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03', # noqa 'size': 1864, - 'uri': 'http://deb.debian.org/debian//pool/contrib/c/cicero/cicero_0.7.2-3.dsc'}, # noqa + 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc'}, # noqa 'cicero_0.7.2.orig.tar.gz': { 'md5sum': '4353dede07c5728319ba7f5595a7230a', 'name': 'cicero_0.7.2.orig.tar.gz', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786', # noqa 'size': 96527, - 'uri': 'http://deb.debian.org/debian//pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz' # noqa + 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz' # noqa } }, } +PACKAGE_FILES2 = { + 'name': 'cicero', + 'version': '0.7.2-4', + 'files': { + 'cicero_0.7.2-4.diff.gz': { + 'md5sum': '1e7e6fc4a59d57c98082a3af78145734', + 'name': 'cicero_0.7.2-4.diff.gz', + 'sha256': '2e6fa296ee7005473ff58d0971f4fd325617b445671480e9f2cfb738d5dbcd01', # noqa + 'size': 4038, + 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-4.diff.gz' # noqa + }, + 'cicero_0.7.2-4.dsc': { + 'md5sum': '1a6c8855a73b4282bb31d15518f18cde', + 'name': 'cicero_0.7.2-4.dsc', + 'sha256': '913ee52f7093913420de5cbe95d63cfa817f1a1daf997961149501894e754f8b', # noqa + 'size': 1881, + 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-4.dsc'}, # noqa + 'cicero_0.7.2.orig.tar.gz': { + 'md5sum': '4353dede07c5728319ba7f5595a7230a', + 'name': 'cicero_0.7.2.orig.tar.gz', + 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786', # noqa + 'size': 96527, + 'uri': 'http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz' # noqa + } + } +} + PACKAGE_PER_VERSION = { - 'stretch/contrib/0.7.2-3': PACKAGE_FILES + 'stretch/contrib/0.7.2-3': PACKAGE_FILES, +} + + +PACKAGES_PER_VERSION = { + 'stretch/contrib/0.7.2-3': PACKAGE_FILES, + 'buster/contrib/0.7.2-4': PACKAGE_FILES2, } def test_debian_first_visit( swh_config, requests_mock_datadir): """With no prior visit, load a gnu project ends up with 1 snapshot """ loader = DebianLoader( url='deb://Debian/packages/cicero', date='2019-10-12T05:58:09.165557+00:00', packages=PACKAGE_PER_VERSION) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' stats = get_stats(loader.storage) assert { 'content': 42, 'directory': 2, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 1, # all artifacts under 1 revision 'skipped_content': 0, 'snapshot': 1 } == stats expected_snapshot = { 'id': '3b6b66e6ee4e7d903a379a882684a2a50480c0b4', 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07', } }, } # different than the previous loader as no release is done check_snapshot(expected_snapshot, loader.storage) def test_debian_first_visit_then_another_visit( swh_config, requests_mock_datadir): """With no prior visit, load a gnu project ends up with 1 snapshot """ url = 'deb://Debian/packages/cicero' loader = DebianLoader( url=url, date='2019-10-12T05:58:09.165557+00:00', packages=PACKAGE_PER_VERSION) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' stats = get_stats(loader.storage) assert { 'content': 42, 'directory': 2, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 1, # all artifacts under 1 revision 'skipped_content': 0, 'snapshot': 1 } == stats expected_snapshot = { 'id': '3b6b66e6ee4e7d903a379a882684a2a50480c0b4', 'branches': { 'releases/stretch/contrib/0.7.2-3': { 'target_type': 'revision', 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07', } }, } # different than the previous loader as no release is done check_snapshot(expected_snapshot, loader.storage) # No change in between load actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'uneventful' origin_visit2 = list(loader.storage.origin_visit_get(url)) assert origin_visit2[-1]['status'] == 'full' stats2 = get_stats(loader.storage) assert { 'content': 42 + 0, 'directory': 2 + 0, 'origin': 1, 'origin_visit': 1 + 1, # a new visit occurred 'person': 1, 'release': 0, 'revision': 1, 'skipped_content': 0, 'snapshot': 1, # same snapshot across 2 visits } == stats2 urls = [ m.url for m in requests_mock_datadir.request_history if m.url.startswith('http://deb.debian.org') ] # visited each package artifact twice across 2 visits assert len(urls) == len(set(urls)) def test_uid_to_person(): uid = 'Someone Name ' actual_person = uid_to_person(uid) assert actual_person == { 'name': 'Someone Name', 'email': 'someone@orga.org', 'fullname': uid, } def test_prepare_person(): actual_author = prepare_person({ 'name': 'Someone Name', 'email': 'someone@orga.org', 'fullname': 'Someone Name ', }) assert actual_author == { 'name': b'Someone Name', 'email': b'someone@orga.org', 'fullname': b'Someone Name ', } def test_download_package(datadir, tmpdir, requests_mock_datadir): tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) all_hashes = download_package(PACKAGE_FILES, tmpdir) assert all_hashes == { 'cicero_0.7.2-3.diff.gz': { 'checksums': { 'blake2s256': '08b1c438e70d2474bab843d826515147fa4a817f8c4baaf3ddfbeb5132183f21', # noqa 'sha1': '0815282053f21601b0ec4adf7a8fe47eace3c0bc', 'sha1_git': '834ac91da3a9da8f23f47004bb456dd5bd16fe49', 'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c' # noqa }, 'filename': 'cicero_0.7.2-3.diff.gz', 'length': 3964}, 'cicero_0.7.2-3.dsc': { 'checksums': { 'blake2s256': '8c002bead3e35818eaa9d00826f3d141345707c58fb073beaa8abecf4bde45d2', # noqa 'sha1': 'abbec4e8efbbc80278236e1dd136831eac08accd', 'sha1_git': '1f94b2086fa1142c2df6b94092f5c5fa11093a8e', 'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03' # noqa }, 'filename': 'cicero_0.7.2-3.dsc', 'length': 1864}, 'cicero_0.7.2.orig.tar.gz': { 'checksums': { 'blake2s256': '9809aa8d2e2dad7f34cef72883db42b0456ab7c8f1418a636eebd30ab71a15a6', # noqa 'sha1': 'a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43', 'sha1_git': 'aa0a38978dce86d531b5b0299b4a616b95c64c74', 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786' # noqa }, 'filename': 'cicero_0.7.2.orig.tar.gz', 'length': 96527 } } def test_dsc_information_ok(): fname = 'cicero_0.7.2-3.dsc' dsc_url, dsc_name = dsc_information(PACKAGE_FILES) assert dsc_url == PACKAGE_FILES['files'][fname]['uri'] assert dsc_name == PACKAGE_FILES['files'][fname]['name'] def test_dsc_information_not_found(): fname = 'cicero_0.7.2-3.dsc' package_files = copy.deepcopy(PACKAGE_FILES) package_files['files'].pop(fname) dsc_url, dsc_name = dsc_information(package_files) assert dsc_url is None assert dsc_name is None def test_dsc_information_too_many_dsc_entries(): # craft an extra dsc file fname = 'cicero_0.7.2-3.dsc' package_files = copy.deepcopy(PACKAGE_FILES) data = package_files['files'][fname] fname2 = fname.replace('cicero', 'ciceroo') package_files['files'][fname2] = data with pytest.raises( ValueError, match='Package %s_%s references several dsc' % ( package_files['name'], package_files['version'])): dsc_information(package_files) def test_get_package_metadata(requests_mock_datadir, datadir, tmp_path): tmp_path = str(tmp_path) # py3.5 compat. package = PACKAGE_FILES logger.debug('package: %s', package) # download the packages all_hashes = download_package(package, tmp_path) # Retrieve information from package _, dsc_name = dsc_information(package) dl_artifacts = [(tmp_path, hashes) for hashes in all_hashes.values()] # Extract information from package extracted_path = extract_package(dl_artifacts, tmp_path) # Retrieve information on package dsc_path = path.join(path.dirname(extracted_path), dsc_name) actual_package_info = get_package_metadata( package, dsc_path, extracted_path) logger.debug('actual_package_info: %s', actual_package_info) assert actual_package_info == { 'changelog': { 'date': '2014-10-19T16:52:35+02:00', 'history': [ ('cicero', '0.7.2-2'), ('cicero', '0.7.2-1'), ('cicero', '0.7-1') ], 'person': { 'email': 'sthibault@debian.org', 'fullname': 'Samuel Thibault ', 'name': 'Samuel Thibault' } }, 'maintainers': [ { 'email': 'debian-accessibility@lists.debian.org', 'fullname': 'Debian Accessibility Team ' '', 'name': 'Debian Accessibility Team' }, { 'email': 'sthibault@debian.org', 'fullname': 'Samuel Thibault ', 'name': 'Samuel Thibault' } ], 'name': 'cicero', 'version': '0.7.2-3' } + + +def test_debian_multiple_packages(swh_config, requests_mock_datadir): + url = 'deb://Debian/packages/cicero' + loader = DebianLoader( + url=url, + date='2019-10-12T05:58:09.165557+00:00', + packages=PACKAGES_PER_VERSION) + + actual_load_status = loader.load() + assert actual_load_status['status'] == 'eventful' + + origin_visit = next(loader.storage.origin_visit_get(url)) + assert origin_visit['status'] == 'full' + + expected_snapshot = { + 'id': 'defc19021187f3727293121fcf6c5c82cb923604', + 'branches': { + 'releases/stretch/contrib/0.7.2-3': { + 'target_type': 'revision', + 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07', + }, + 'releases/buster/contrib/0.7.2-4': { + 'target_type': 'revision', + 'target': '8224139c274c984147ef4b09aa0e462c55a10bd3', + } + }, + } + + check_snapshot(expected_snapshot, loader.storage)