diff --git a/PKG-INFO b/PKG-INFO index 4504fa9..b191d4d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.objstorage -Version: 0.0.21 +Version: 0.0.22 Summary: Software Heritage Object Storage Home-page: https://forge.softwareheritage.org/diffusion/DOBJS Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index 54ebfa0..af997da 100644 --- a/debian/control +++ b/debian/control @@ -1,38 +1,32 @@ Source: swh-objstorage Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), + python3-aiohttp (>= 2.1.0), python3-all, python3-flask, python3-nose, python3-setuptools, python3-swh.core (>= 0.0.28~), python3-swh.model (>= 0.0.14~), - python3-swh.storage.archiver (>= 0.0.52~), python3-click, python3-libcloud, python3-azure-storage, python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DOBJS/ Package: python3-swh.objstorage Architecture: all -Depends: python3-swh.core (>= 0.0.28~), ${misc:Depends}, ${python3:Depends} +Depends: python3-swh.core (>= 0.0.28~), ${misc:Depends}, ${python3:Depends}, python3-aiohttp (>= 2.1.0) Description: Software Heritage Object Storage -Package: python3-swh.objstorage.checker -Architecture: all -Depends: python3-swh.objstorage (= ${binary:Version}), python3-swh.storage.archiver (>= 0.0.52~), - ${misc:Depends}, ${python3:Depends} -Description: Software Heritage Object Storage Checker - Package: python3-swh.objstorage.cloud Architecture: all Depends: python3-swh.objstorage (= ${binary:Version}), python3-libcloud, python3-azure-storage, ${misc:Depends}, ${python3:Depends} Breaks: python3-swh.objstorage (<= 0.0.7~) Description: Software Heritage Cloud Object Storage diff --git a/debian/rules b/debian/rules index c7c7d00..b86925b 100755 --- a/debian/rules +++ b/debian/rules @@ -1,22 +1,20 @@ #!/usr/bin/make -f export PYBUILD_NAME=swh.objstorage %: dh $@ --with python3 --buildsystem=pybuild override_dh_install: dh_install for pyvers in $(shell py3versions -vr); do \ - mkdir -p $(CURDIR)/debian/python3-swh.objstorage.checker/usr/lib/python$$pyvers/dist-packages/swh/objstorage/ ; \ - mv $(CURDIR)/debian/python3-swh.objstorage/usr/lib/python$$pyvers/dist-packages/swh/objstorage/checker.py \ - $(CURDIR)/debian/python3-swh.objstorage.checker/usr/lib/python$$pyvers/dist-packages/swh/objstorage/ ; \ mkdir -p $(CURDIR)/debian/python3-swh.objstorage.cloud/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud ; \ mv $(CURDIR)/debian/python3-swh.objstorage/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud/* \ $(CURDIR)/debian/python3-swh.objstorage.cloud/usr/lib/python$$pyvers/dist-packages/swh/objstorage/cloud/ ; \ done override_dh_auto_test: PYBUILD_SYSTEM=custom \ PYBUILD_TEST_ARGS="cd {build_dir}; python{version} -m nose swh -sva '!db'" \ + no_proxy=127.0.0.1 \ dh_auto_test diff --git a/requirements-swh.txt b/requirements-swh.txt index cdf0801..464723a 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,2 @@ swh.core >= 0.0.28 swh.model >= 0.0.14 -swh.storage.archiver >= 0.0.52 diff --git a/requirements.txt b/requirements.txt index 38490a9..c156824 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html vcversioner # remote storage API server -aiohttp +aiohttp >= 2.1.0 click # optional dependencies # apache-libcloud # azure-storage diff --git a/swh.objstorage.egg-info/PKG-INFO b/swh.objstorage.egg-info/PKG-INFO index 4504fa9..b191d4d 100644 --- a/swh.objstorage.egg-info/PKG-INFO +++ b/swh.objstorage.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.objstorage -Version: 0.0.21 +Version: 0.0.22 Summary: Software Heritage Object Storage Home-page: https://forge.softwareheritage.org/diffusion/DOBJS Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.objstorage.egg-info/SOURCES.txt b/swh.objstorage.egg-info/SOURCES.txt index 94a10da..3f410e5 100644 --- a/swh.objstorage.egg-info/SOURCES.txt +++ b/swh.objstorage.egg-info/SOURCES.txt @@ -1,50 +1,48 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile requirements-swh.txt requirements.txt setup.py version.txt bin/swh-objstorage-add-dir bin/swh-objstorage-azure bin/swh-objstorage-fsck debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format swh.objstorage.egg-info/PKG-INFO swh.objstorage.egg-info/SOURCES.txt swh.objstorage.egg-info/dependency_links.txt swh.objstorage.egg-info/requires.txt swh.objstorage.egg-info/top_level.txt swh/objstorage/__init__.py -swh/objstorage/checker.py swh/objstorage/exc.py swh/objstorage/objstorage.py swh/objstorage/objstorage_pathslicing.py swh/objstorage/api/__init__.py swh/objstorage/api/client.py swh/objstorage/api/server.py swh/objstorage/cloud/__init__.py swh/objstorage/cloud/objstorage_azure.py swh/objstorage/cloud/objstorage_cloud.py swh/objstorage/multiplexer/__init__.py swh/objstorage/multiplexer/multiplexer_objstorage.py swh/objstorage/multiplexer/filter/__init__.py swh/objstorage/multiplexer/filter/filter.py swh/objstorage/multiplexer/filter/id_filter.py swh/objstorage/multiplexer/filter/read_write_filter.py swh/objstorage/tests/objstorage_testing.py swh/objstorage/tests/server_testing.py -swh/objstorage/tests/test_checker.py swh/objstorage/tests/test_multiplexer_filter.py swh/objstorage/tests/test_objstorage_api.py swh/objstorage/tests/test_objstorage_azure.py swh/objstorage/tests/test_objstorage_cloud.py swh/objstorage/tests/test_objstorage_instantiation.py swh/objstorage/tests/test_objstorage_multiplexer.py swh/objstorage/tests/test_objstorage_pathslicing.py \ No newline at end of file diff --git a/swh.objstorage.egg-info/requires.txt b/swh.objstorage.egg-info/requires.txt index 31fea90..f48fb1f 100644 --- a/swh.objstorage.egg-info/requires.txt +++ b/swh.objstorage.egg-info/requires.txt @@ -1,6 +1,5 @@ -aiohttp +aiohttp>=2.1.0 click swh.core>=0.0.28 swh.model>=0.0.14 -swh.storage.archiver>=0.0.52 vcversioner diff --git a/swh/objstorage/checker.py b/swh/objstorage/checker.py deleted file mode 100644 index 08dd81c..0000000 --- a/swh/objstorage/checker.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright (C) 2015-2016 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import abc -import click -import logging - -from swh.core import config -from swh.storage.archiver.storage import ArchiverStorage - -from swh.objstorage import get_objstorage -from swh.objstorage.exc import ObjNotFoundError, Error - - -class BaseContentChecker(config.SWHConfig, metaclass=abc.ABCMeta): - """Abstract class of the content integrity checker. - - This checker's purpose is to iterate over the contents of a storage and - check the integrity of each file. - Behavior of the checker to deal with corrupted status will be specified - by subclasses. - - You should override the DEFAULT_CONFIG and CONFIG_BASE_FILENAME - variables if you need it. - - """ - DEFAULT_CONFIG = { - 'storage': ('dict', { - 'cls': 'pathslicing', - 'args': { - 'root': '/srv/softwareheritage/objects', - 'slicing': '0:2/2:4/4:6' - } - }), - 'batch_size': ('int', 1000), - } - - CONFIG_BASE_FILENAME = 'objstorage/objstorage_checker' - - def __init__(self): - """ Create a checker that ensure the objstorage have no corrupted file - """ - self.config = self.parse_config_file() - self.objstorage = get_objstorage(**self.config['storage']) - self.batch_size = self.config['batch_size'] - - def run_as_daemon(self): - """ Start the check routine and perform it forever. - - Use this method to run the checker as a daemon that will iterate over - the content forever in background. - """ - while True: - try: - self.run() - except: - pass - - def run(self): - """ Check a batch of content. - """ - for obj_id in self._get_content_to_check(self.batch_size): - cstatus = self._check_content(obj_id) - if cstatus == 'corrupted': - self.corrupted_content(obj_id) - elif cstatus == 'missing': - self.missing_content(obj_id) - - def _get_content_to_check(self, batch_size): - """ Get the content that should be verified. - - Returns: - An iterable of the content's id that need to be checked. - """ - yield from self.objstorage.get_random(batch_size) - - def _check_content(self, obj_id): - """ Check the validity of the given content. - - Returns: - True if the content was valid, false if it was corrupted. - """ - try: - self.objstorage.check(obj_id) - except ObjNotFoundError: - return 'missing' - except Error: - return 'corrupted' - - @abc.abstractmethod - def corrupted_content(self, obj_id): - """ Perform an action to treat with a corrupted content. - """ - raise NotImplementedError("%s must implement " - "'corrupted_content' method" % type(self)) - - @abc.abstractmethod - def missing_content(self, obj_id): - """ Perform an action to treat with a missing content. - """ - raise NotImplementedError("%s must implement " - "'missing_content' method" % type(self)) - - -class LogContentChecker(BaseContentChecker): - """ Content integrity checker that just log detected errors. - """ - - DEFAULT_CONFIG = { - 'storage': ('dict', { - 'cls': 'pathslicing', - 'args': { - 'root': '/srv/softwareheritage/objects', - 'slicing': '0:2/2:4/4:6' - } - }), - 'batch_size': ('int', 1000), - 'log_tag': ('str', 'objstorage.checker') - } - - CONFIG_BASE_FILENAME = 'objstorage/log_checker' - - def __init__(self): - super().__init__() - self.logger = logging.getLogger(self.config['log_tag']) - - def corrupted_content(self, obj_id): - """ Perform an action to treat with a corrupted content. - """ - self.logger.error('Content %s is corrupted' % obj_id) - - def missing_content(self, obj_id): - """ Perform an action to treat with a missing content. - """ - self.logger.error('Content %s is detected missing' % obj_id) - - -class RepairContentChecker(LogContentChecker): - """ Content integrity checker that will try to restore contents. - """ - - DEFAULT_CONFIG = { - 'storage': ('dict', { - 'cls': 'pathslicing', - 'args': { - 'root': '/srv/softwareheritage/objects', - 'slicing': '0:2/2:4/4:6' - } - }), - 'batch_size': ('int', 1000), - 'log_tag': ('str', 'objstorage.checker'), - 'backup_storages': ('dict', { - 'banco': { - 'cls': 'remote', - 'args': {'url': 'http://banco:5003/'} - } - }) - } - - CONFIG_BASE_FILENAME = 'objstorage/repair_checker' - - def __init__(self): - super().__init__() - self.backups = [ - get_objstorage(**storage) - for name, storage in self.config['backup_storages'].items() - ] - - def corrupted_content(self, obj_id): - """ Perform an action to treat with a corrupted content. - """ - super().corrupted_content(obj_id) - self._restore(obj_id) - - def missing_content(self, obj_id): - """ Perform an action to treat with a missing content. - """ - super().missing_content(obj_id) - self._restore(obj_id) - - def _restore(self, obj_id): - if not self._perform_restore(obj_id): - # Object could not be restored - self.logger.critical( - 'Object %s is corrupted and could not be repaired' % obj_id - ) - - def _perform_restore(self, obj_id): - """ Try to restore the object in the current storage using the backups - """ - for backup in self.backups: - try: - content = backup.get(obj_id) - self.objstorage.restore(content, obj_id) - except ObjNotFoundError as e: - continue - else: - # Return True direclty when a backup contains the object - return True - # No backup contains the object - return False - - -class ArchiveNotifierContentChecker(LogContentChecker): - """ Implementation of the checker that will update the archiver database - - Once the database is updated the archiver may restore the content on it's - next scheduling as it won't be present anymore, and this status change - will probably make the retention policy invalid. - """ - DEFAULT_CONFIG = { - 'storage': ('dict', { - 'cls': 'pathslicing', - 'args': { - 'root': '/srv/softwareheritage/objects', - 'slicing': '0:2/2:4/4:6' - } - }), - 'batch_size': ('int', 1000), - 'log_tag': ('str', 'objstorage.checker'), - 'storage_name': ('str', 'banco'), - 'dbconn': ('str', 'dbname=softwareheritage-archiver-dev') - } - - CONFIG_BASE_FILENAME = 'objstorage/archive_notifier_checker' - - def __init__(self): - super().__init__() - self.archiver_db = ArchiverStorage(self.config['dbconn']) - self.storage_name = self.config['storage_name'] - - def corrupted_content(self, obj_id): - """ Perform an action to treat with a corrupted content. - """ - super().corrupted_content(obj_id) - self._update_status(obj_id, 'corrupted') - - def missing_content(self, obj_id): - """ Perform an action to treat with a missing content. - """ - super().missing_content(obj_id) - self._update_status(obj_id, 'missing') - - def _update_status(self, obj_id, status): - self.archiver_db.content_archive_update(obj_id, self.storage_name, - new_status=status) - - -@click.command() -@click.argument('checker-type', required=1, default='log') -@click.option('--daemon/--nodaemon', default=True, - help='Indicates if the checker should run forever ' - 'or on a single batch of content') -def launch(checker_type, daemon): - types = { - 'log': LogContentChecker, - 'repair': RepairContentChecker, - 'archiver_notifier': ArchiveNotifierContentChecker - } - checker = types[checker_type]() - if daemon: - checker.run_as_daemon() - else: - checker.run() - - -if __name__ == '__main__': - launch() diff --git a/swh/objstorage/tests/test_checker.py b/swh/objstorage/tests/test_checker.py deleted file mode 100644 index 57f75cb..0000000 --- a/swh/objstorage/tests/test_checker.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (C) 2015-2017 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import gzip -import tempfile -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.objstorage.exc import ObjNotFoundError -from swh.objstorage.checker import RepairContentChecker -from swh.model import hashutil - - -class MockBackupObjStorage(): - - def __init__(self): - self.values = {} - - def add(self, value, obj_id): - self.values[obj_id] = value - - def get(self, obj_id): - try: - return self.values[obj_id] - except KeyError: - raise ObjNotFoundError(obj_id) - - -@attr('fs') -class TestRepairChecker(unittest.TestCase): - """ Test the content integrity checker - """ - - def setUp(self): - super().setUp() - self._alter_config() - self.checker = RepairContentChecker() - self.checker.backups = [MockBackupObjStorage(), - MockBackupObjStorage()] - - def _alter_config(self): - RepairContentChecker.parse_config_file = ( - lambda cls: { - 'storage': {'cls': 'pathslicing', - 'args': {'root': tempfile.mkdtemp(), - 'slicing': '0:2/2:4/4:6'}}, - 'batch_size': 1000, - 'log_tag': 'objstorage_test', - 'backup_storages': {} - } - ) - - def _corrupt_content(self, obj_id): - """ Make the given content invalid. - """ - hex_obj_id = hashutil.hash_to_hex(obj_id) - file_path = self.checker.objstorage._obj_path(hex_obj_id) - with gzip.open(file_path, 'wb') as f: - f.write(b'Unexpected content') - - def _is_corrupted(self, obj_id): - """ Ensure the given object is corrupted - """ - return self.checker._check_content(obj_id) == 'corrupted' - - def _is_missing(self, obj_id): - """ Ensure the given object is missing - """ - return self.checker._check_content(obj_id) == 'missing' - - @istest - def check_valid_content(self): - # Check that a valid content is valid. - content = b'check_valid_content' - obj_id = self.checker.objstorage.add(content) - self.assertFalse(self._is_corrupted(obj_id)) - self.assertFalse(self._is_missing(obj_id)) - - @istest - def check_corrupted_content(self): - # Check that an invalid content is noticed. - content = b'check_corrupted_content' - obj_id = self.checker.objstorage.add(content) - self._corrupt_content(obj_id) - self.assertTrue(self._is_corrupted(obj_id)) - self.assertFalse(self._is_missing(obj_id)) - - @istest - def check_missing_content(self): - obj_id = hashutil.hash_data(b'check_missing_content')['sha1'] - self.assertFalse(self._is_corrupted(obj_id)) - self.assertTrue(self._is_missing(obj_id)) - - @istest - def repair_content_present_first(self): - # Try to repair a content that is in the backup storage. - content = b'repair_content_present_first' - obj_id = self.checker.objstorage.add(content) - # Add a content to the mock - self.checker.backups[0].add(content, obj_id) - # Corrupt and repair it. - self._corrupt_content(obj_id) - self.assertTrue(self._is_corrupted(obj_id)) - self.checker.corrupted_content(obj_id) - self.assertFalse(self._is_corrupted(obj_id)) - - @istest - def repair_content_present_second(self): - # Try to repair a content that is in the backup storage. - content = b'repair_content_present_first' - obj_id = self.checker.objstorage.add(content) - # Add a content to the mock - self.checker.backups[-1].add(content, obj_id) - # Corrupt and repair it. - self._corrupt_content(obj_id) - self.assertTrue(self._is_corrupted(obj_id)) - self.checker.corrupted_content(obj_id) - self.assertFalse(self._is_corrupted(obj_id)) - - @istest - def repair_content_present_distributed(self): - # Try to repair two contents that are in separate backup storages. - content1 = b'repair_content_present_distributed_2' - content2 = b'repair_content_present_distributed_1' - obj_id1 = self.checker.objstorage.add(content1) - obj_id2 = self.checker.objstorage.add(content2) - # Add content to the mock. - self.checker.backups[0].add(content1, obj_id1) - self.checker.backups[1].add(content2, obj_id2) - # Corrupt the contents - self._corrupt_content(obj_id1) - self._corrupt_content(obj_id2) - self.assertTrue(self._is_corrupted(obj_id1)) - self.assertTrue(self._is_corrupted(obj_id2)) - # Repare them - self.checker.corrupted_content(obj_id1) - self.checker.corrupted_content(obj_id2) - self.assertFalse(self._is_corrupted(obj_id1)) - self.assertFalse(self._is_corrupted(obj_id2)) - - @istest - def repair_content_missing(self): - # Try to repair a content that is NOT in the backup storage. - content = b'repair_content_missing' - obj_id = self.checker.objstorage.add(content) - # Corrupt the content - self._corrupt_content(obj_id) - self.assertTrue(self._is_corrupted(obj_id)) - # Try to repair it - self.checker.corrupted_content(obj_id) - self.assertTrue(self._is_corrupted(obj_id)) diff --git a/swh/objstorage/tests/test_multiplexer_filter.py b/swh/objstorage/tests/test_multiplexer_filter.py index da7b62d..b7322f2 100644 --- a/swh/objstorage/tests/test_multiplexer_filter.py +++ b/swh/objstorage/tests/test_multiplexer_filter.py @@ -1,335 +1,331 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import tempfile import unittest import random from string import ascii_lowercase from nose.tools import istest -from nose.plugins.attrib import attr from swh.model import hashutil from swh.objstorage.exc import ObjNotFoundError, Error from swh.objstorage import get_objstorage from swh.objstorage.multiplexer.filter import read_only, id_prefix, id_regex def get_random_content(): return bytes(''.join(random.sample(ascii_lowercase, 10)), 'utf8') -@attr('!db') class MixinTestReadFilter(unittest.TestCase): # Read only filter should not allow writing def setUp(self): super().setUp() pstorage = {'cls': 'pathslicing', 'args': {'root': tempfile.mkdtemp(), 'slicing': '0:5'}} base_storage = get_objstorage(**pstorage) base_storage.id = lambda cont: hashutil.hash_data(cont)['sha1'] self.storage = get_objstorage('filtered', {'storage_conf': pstorage, 'filters_conf': [read_only()]}) self.valid_content = b'pre-existing content' self.invalid_content = b'invalid_content' self.true_invalid_content = b'Anything that is not correct' self.absent_content = b'non-existent content' # Create a valid content. self.valid_id = base_storage.add(self.valid_content) # Create an invalid id and add a content with it. self.invalid_id = base_storage.id(self.true_invalid_content) base_storage.add(self.invalid_content, obj_id=self.invalid_id) # Compute an id for a non-existing content. self.absent_id = base_storage.id(self.absent_content) @istest def can_contains(self): self.assertTrue(self.valid_id in self.storage) self.assertTrue(self.invalid_id in self.storage) self.assertFalse(self.absent_id in self.storage) @istest def can_iter(self): self.assertIn(self.valid_id, iter(self.storage)) self.assertIn(self.invalid_id, iter(self.storage)) @istest def can_len(self): self.assertEqual(2, len(self.storage)) @istest def can_get(self): self.assertEqual(self.valid_content, self.storage.get(self.valid_id)) self.assertEqual(self.invalid_content, self.storage.get(self.invalid_id)) @istest def can_check(self): with self.assertRaises(ObjNotFoundError): self.storage.check(self.absent_id) with self.assertRaises(Error): self.storage.check(self.invalid_id) self.storage.check(self.valid_id) @istest def can_get_random(self): self.assertEqual(1, len(list(self.storage.get_random(1)))) self.assertEqual(len(list(self.storage)), len(set(self.storage.get_random(1000)))) @istest def cannot_add(self): new_id = self.storage.add(b'New content') result = self.storage.add(self.valid_content, self.valid_id) self.assertIsNone(new_id, self.storage) self.assertIsNone(result) @istest def cannot_restore(self): result = self.storage.restore(self.valid_content, self.valid_id) self.assertIsNone(result) class MixinTestIdFilter(): """ Mixin class that tests the filters based on filter.IdFilter Methods "make_valid", "make_invalid" and "filter_storage" must be implemented by subclasses. """ def setUp(self): super().setUp() # Use a hack here : as the mock uses the content as id, it is easy to # create contents that are filtered or not. self.prefix = '71' # Make the storage filtered self.sconf = {'cls': 'pathslicing', 'args': {'root': tempfile.mkdtemp(), 'slicing': '0:5'}} storage = get_objstorage(**self.sconf) self.base_storage = storage self.storage = self.filter_storage(self.sconf) # Set the id calculators storage.id = lambda cont: hashutil.hash_data(cont)['sha1'] # Present content with valid id self.present_valid_content = self.ensure_valid(b'yroqdtotji') self.present_valid_id = storage.id(self.present_valid_content) # Present content with invalid id self.present_invalid_content = self.ensure_invalid(b'glxddlmmzb') self.present_invalid_id = storage.id(self.present_invalid_content) # Missing content with valid id self.missing_valid_content = self.ensure_valid(b'rmzkdclkez') self.missing_valid_id = storage.id(self.missing_valid_content) # Missing content with invalid id self.missing_invalid_content = self.ensure_invalid(b'hlejfuginh') self.missing_invalid_id = storage.id(self.missing_invalid_content) # Present corrupted content with valid id self.present_corrupted_valid_content = self.ensure_valid(b'cdsjwnpaij') self.true_present_corrupted_valid_content = self.ensure_valid( b'mgsdpawcrr') self.present_corrupted_valid_id = storage.id( self.true_present_corrupted_valid_content) # Present corrupted content with invalid id self.present_corrupted_invalid_content = self.ensure_invalid( b'pspjljnrco') self.true_present_corrupted_invalid_content = self.ensure_invalid( b'rjocbnnbso') self.present_corrupted_invalid_id = storage.id( self.true_present_corrupted_invalid_content) # Missing (potentially) corrupted content with valid id self.missing_corrupted_valid_content = self.ensure_valid( b'zxkokfgtou') self.true_missing_corrupted_valid_content = self.ensure_valid( b'royoncooqa') self.missing_corrupted_valid_id = storage.id( self.true_missing_corrupted_valid_content) # Missing (potentially) corrupted content with invalid id self.missing_corrupted_invalid_content = self.ensure_invalid( b'hxaxnrmnyk') self.true_missing_corrupted_invalid_content = self.ensure_invalid( b'qhbolyuifr') self.missing_corrupted_invalid_id = storage.id( self.true_missing_corrupted_invalid_content) # Add the content that are supposed to be present self.storage.add(self.present_valid_content) self.storage.add(self.present_invalid_content) self.storage.add(self.present_corrupted_valid_content, obj_id=self.present_corrupted_valid_id) self.storage.add(self.present_corrupted_invalid_content, obj_id=self.present_corrupted_invalid_id) def filter_storage(self, sconf): raise NotImplementedError( 'Id_filter test class must have a filter_storage method') def ensure_valid(self, content=None): if content is None: content = get_random_content() while not self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content def ensure_invalid(self, content=None): if content is None: content = get_random_content() while self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content @istest def contains(self): # Both contents are present, but the invalid one should be ignored. self.assertTrue(self.present_valid_id in self.storage) self.assertFalse(self.present_invalid_id in self.storage) self.assertFalse(self.missing_valid_id in self.storage) self.assertFalse(self.missing_invalid_id in self.storage) self.assertTrue(self.present_corrupted_valid_id in self.storage) self.assertFalse(self.present_corrupted_invalid_id in self.storage) self.assertFalse(self.missing_corrupted_valid_id in self.storage) self.assertFalse(self.missing_corrupted_invalid_id in self.storage) @istest def iter(self): self.assertIn(self.present_valid_id, iter(self.storage)) self.assertNotIn(self.present_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_valid_id, iter(self.storage)) self.assertNotIn(self.missing_invalid_id, iter(self.storage)) self.assertIn(self.present_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage)) @istest def len(self): # Four contents are present, but only two should be valid. self.assertEqual(2, len(self.storage)) @istest def get(self): self.assertEqual(self.present_valid_content, self.storage.get(self.present_valid_id)) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_invalid_id) self.assertEqual(self.present_corrupted_valid_content, self.storage.get(self.present_corrupted_valid_id)) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_invalid_id) @istest def check(self): self.storage.check(self.present_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_invalid_id) with self.assertRaises(Error): self.storage.check(self.present_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_invalid_id) @istest def get_random(self): self.assertEqual(0, len(list(self.storage.get_random(0)))) random_content = list(self.storage.get_random(1000)) self.assertIn(self.present_valid_id, random_content) self.assertNotIn(self.present_invalid_id, random_content) self.assertNotIn(self.missing_valid_id, random_content) self.assertNotIn(self.missing_invalid_id, random_content) self.assertIn(self.present_corrupted_valid_id, random_content) self.assertNotIn(self.present_corrupted_invalid_id, random_content) self.assertNotIn(self.missing_corrupted_valid_id, random_content) self.assertNotIn(self.missing_corrupted_invalid_id, random_content) @istest def add(self): # Add valid and invalid contents to the storage and check their # presence with the unfiltered storage. valid_content = self.ensure_valid(b'ulepsrjbgt') valid_id = self.base_storage.id(valid_content) invalid_content = self.ensure_invalid(b'znvghkjked') invalid_id = self.base_storage.id(invalid_content) self.storage.add(valid_content) self.storage.add(invalid_content) self.assertTrue(valid_id in self.base_storage) self.assertFalse(invalid_id in self.base_storage) @istest def restore(self): # Add corrupted content to the storage and the try to restore it valid_content = self.ensure_valid(b'ulepsrjbgt') valid_id = self.base_storage.id(valid_content) corrupted_content = self.ensure_valid(b'ltjkjsloyb') corrupted_id = self.base_storage.id(corrupted_content) self.storage.add(corrupted_content, obj_id=valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(corrupted_id) with self.assertRaises(Error): self.storage.check(valid_id) self.storage.restore(valid_content) self.storage.check(valid_id) -@attr('!db') class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.prefix = b'71' super().setUp() def ensure_valid(self, content): obj_id = hashutil.hash_data(content)['sha1'] hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertTrue(hex_obj_id.startswith(self.prefix)) return content def ensure_invalid(self, content): obj_id = hashutil.hash_data(content)['sha1'] hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertFalse(hex_obj_id.startswith(self.prefix)) return content def filter_storage(self, sconf): return get_objstorage('filtered', {'storage_conf': sconf, 'filters_conf': [id_prefix(self.prefix)]}) -@attr('!db') class TestRegexFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.regex = r'[a-f][0-9].*' super().setUp() def filter_storage(self, sconf): return get_objstorage('filtered', {'storage_conf': sconf, 'filters_conf': [id_regex(self.regex)]}) diff --git a/swh/objstorage/tests/test_objstorage_api.py b/swh/objstorage/tests/test_objstorage_api.py index 0070e51..400bd43 100644 --- a/swh/objstorage/tests/test_objstorage_api.py +++ b/swh/objstorage/tests/test_objstorage_api.py @@ -1,37 +1,34 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import tempfile import unittest -from nose.plugins.attrib import attr - from swh.objstorage import get_objstorage from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture from swh.objstorage.tests.server_testing import ServerTestFixture from swh.objstorage.api.server import make_app -@attr('db') class TestRemoteObjStorage(ServerTestFixture, ObjStorageTestFixture, unittest.TestCase): """ Test the remote archive API. """ def setUp(self): self.config = { 'cls': 'pathslicing', 'args': { 'root': tempfile.mkdtemp(), 'slicing': '0:1/0:5', }, 'client_max_size': 8 * 1024 * 1024, } self.app = make_app(self.config) super().setUp() self.storage = get_objstorage('remote', { 'url': self.url() }) diff --git a/version.txt b/version.txt index 8bc3a77..7bb5ff3 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.21-0-ge70f532 \ No newline at end of file +v0.0.22-0-g4412792 \ No newline at end of file