Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/tar/tests/test_loader.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import pytest | import pytest | ||||
from swh.loader.core.tests import BaseLoaderTest, LoaderNoStorage | from swh.model import hashutil | ||||
from swh.loader.tar.loader import TarLoader | |||||
class TarLoaderNoStorage(LoaderNoStorage, TarLoader): | |||||
"""A DirLoader with no persistence. | |||||
Context: | |||||
Load a tarball with a persistent-less tarball loader | |||||
""" | from swh.loader.core.tests import BaseLoaderTest | ||||
def __init__(self, config={}): | from swh.loader.tar.loader import TarLoader | ||||
super().__init__(config=config) | |||||
self.origin_id = 1 | |||||
self.visit = 1 | |||||
TEST_CONFIG = { | TEST_CONFIG = { | ||||
'extraction_dir': '/tmp/tests/loader-tar/', # where to extract the tarball | 'extraction_dir': '/tmp/tests/loader-tar/', # where to extract the tarball | ||||
'storage': { # we instantiate it but we don't use it in test context | 'storage': { # we instantiate it but we don't use it in test context | ||||
'cls': 'remote', | 'cls': 'memory', | ||||
'args': { | 'args': { | ||||
'url': 'http://127.0.0.1:9999', # somewhere that does not exist | |||||
} | } | ||||
}, | }, | ||||
'send_contents': False, | 'send_contents': True, | ||||
'send_directories': False, | 'send_directories': True, | ||||
'send_revisions': False, | 'send_revisions': True, | ||||
'send_releases': False, | 'send_releases': True, | ||||
'send_snapshot': False, | 'send_snapshot': True, | ||||
'content_packet_size': 100, | 'content_packet_size': 100, | ||||
'content_packet_block_size_bytes': 104857600, | 'content_packet_block_size_bytes': 104857600, | ||||
'content_packet_size_bytes': 1073741824, | 'content_packet_size_bytes': 1073741824, | ||||
'directory_packet_size': 250, | 'directory_packet_size': 250, | ||||
'revision_packet_size': 100, | 'revision_packet_size': 100, | ||||
'release_packet_size': 100, | 'release_packet_size': 100, | ||||
'content_size_limit': 1000000000 | |||||
} | } | ||||
def parse_config_file(base_filename=None, config_filename=None, | class TestTarLoader(TarLoader): | ||||
additional_configs=None, global_config=True): | def parse_config_file(self, *args, **kwargs): | ||||
return TEST_CONFIG | return TEST_CONFIG | ||||
# Inhibit side-effect loading configuration from disk | |||||
TarLoader.parse_config_file = parse_config_file | |||||
class TarLoaderTest(BaseLoaderTest): | class TarLoaderTest(BaseLoaderTest): | ||||
"""Prepare the archive to load | """Prepare the archive to load | ||||
""" | """ | ||||
def setUp(self): | def setUp(self): | ||||
super().setUp('sample-folder.tgz', | super().setUp('sample-folder.tgz', | ||||
start_path=os.path.dirname(__file__), | start_path=os.path.dirname(__file__), | ||||
uncompress_archive=False) | uncompress_archive=False) | ||||
self.tarpath = self.destination_path | self.tarpath = self.destination_path | ||||
class TarLoaderTest1(TarLoaderTest): | class TarLoaderTest1(TarLoaderTest): | ||||
def setUp(self): | def setUp(self): | ||||
super().setUp() | super().setUp() | ||||
self.loader = TarLoaderNoStorage() | self.loader = TestTarLoader() | ||||
self.storage = self.loader.storage | |||||
@pytest.mark.fs | @pytest.mark.fs | ||||
def test_load(self): | def test_load(self): | ||||
"""Process a new tarball should be ok | """Process a new tarball should be ok | ||||
""" | """ | ||||
# given | # given | ||||
origin = { | origin = { | ||||
'url': 'file:///tmp/sample-folder', | 'url': 'file:///tmp/sample-folder', | ||||
'type': 'dir' | 'type': 'dir' | ||||
} | } | ||||
visit_date = 'Tue, 3 May 2016 17:16:32 +0200' | visit_date = 'Tue, 3 May 2016 17:16:32 +0200' | ||||
import datetime | import datetime | ||||
commit_time = int(datetime.datetime.now( | commit_time = int(datetime.datetime( | ||||
tz=datetime.timezone.utc).timestamp() | 2018, 12, 5, 13, 35, 23, 0, | ||||
) | tzinfo=datetime.timezone(datetime.timedelta(hours=1)) | ||||
).timestamp()) | |||||
swh_person = { | swh_person = { | ||||
'name': 'Software Heritage', | 'name': 'Software Heritage', | ||||
'fullname': 'Software Heritage', | 'fullname': 'Software Heritage', | ||||
'email': 'robot@softwareheritage.org' | 'email': 'robot@softwareheritage.org' | ||||
} | } | ||||
revision_message = 'swh-loader-tar: synthetic revision message' | revision_message = 'swh-loader-tar: synthetic revision message' | ||||
Show All 15 Lines | def test_load(self): | ||||
} | } | ||||
branch_name = os.path.basename(self.tarpath) | branch_name = os.path.basename(self.tarpath) | ||||
# when | # when | ||||
self.loader.load(tar_path=self.tarpath, origin=origin, | self.loader.load(tar_path=self.tarpath, origin=origin, | ||||
visit_date=visit_date, revision=revision, | visit_date=visit_date, revision=revision, | ||||
branch_name=branch_name) | branch_name=branch_name) | ||||
self.loader.flush() | |||||
ardumont: why do you have to flush explicitely?
The loader-core's current behavior is supposed to do it. | |||||
vlorentzAuthorUnsubmitted Done Inline ActionsI don't. I was trying stuff and forgot to clean this vlorentz: I don't. I was trying stuff and forgot to clean this | |||||
# then | # then | ||||
self.assertCountContents(8, "3 files + 5 links") | self.assertCountContents(8, "3 files + 5 links") | ||||
self.assertCountDirectories(6, "4 subdirs + 1 empty + 1 main dir") | self.assertCountDirectories(6, "4 subdirs + 1 empty + 1 main dir") | ||||
self.assertCountRevisions(1, "synthetic revision") | self.assertCountRevisions(1, "synthetic revision") | ||||
actual_revision = self.state('revision')[0] | rev_id = hashutil.hash_to_bytes( | ||||
'67a7d7dda748f9a86b56a13d9218d16f5cc9ab3d') | |||||
actual_revision = next(self.storage.revision_get([rev_id])) | |||||
self.assertTrue(actual_revision['synthetic']) | self.assertTrue(actual_revision['synthetic']) | ||||
self.assertEqual(actual_revision['parents'], []) | self.assertEqual(actual_revision['parents'], []) | ||||
self.assertEqual(actual_revision['type'], 'tar') | self.assertEqual(actual_revision['type'], 'tar') | ||||
self.assertEqual(actual_revision['message'], | self.assertEqual(actual_revision['message'], | ||||
b'swh-loader-tar: synthetic revision message') | b'swh-loader-tar: synthetic revision message') | ||||
self.assertEqual(actual_revision['directory'], | self.assertEqual(actual_revision['directory'], | ||||
b'\xa7A\xfcM\x96\x8c{\x8e<\x94\xff\x86\xe7\x04\x80\xc5\xc7\xe5r\xa9') # noqa | b'\xa7A\xfcM\x96\x8c{\x8e<\x94\xff\x86\xe7\x04\x80\xc5\xc7\xe5r\xa9') # noqa | ||||
Show All 14 Lines |
why do you have to flush explicitely?
The loader-core's current behavior is supposed to do it.
(for now loader-tar -inherits-> loader-dir -inherits-> loader-core, for now because i'd like to simplify this inheritance chain, never had a chance to ;)