diff --git a/debian/control b/debian/control index 040aff3..998c410 100644 --- a/debian/control +++ b/debian/control @@ -1,25 +1,25 @@ Source: swh-archiver Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-all, python3-click, python3-nose, python3-psycopg2, python3-setuptools, python3-swh.core (>= 0.0.44), python3-swh.journal (>= 0.0.2), - python3-swh.model (>= 0.0.15), + python3-swh.model (>= 0.0.27), python3-swh.objstorage (>= 0.0.17), python3-swh.scheduler (>= 0.0.32), python3-swh.storage (>= 0.0.102), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/source/swh-archiver/ Package: python3-swh.archiver Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Description: Software Heritage Archiver diff --git a/requirements-swh.txt b/requirements-swh.txt index 0c489da..c578aa3 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,6 +1,6 @@ swh.core >= 0.0.44 swh.journal >= 0.0.2 -swh.model >= 0.0.15 +swh.model >= 0.0.27 swh.objstorage >= 0.0.17 swh.scheduler >= 0.0.32 swh.storage >= 0.0.102 diff --git a/swh/archiver/tests/test_checker.py b/swh/archiver/tests/test_checker.py index 9529766..7001a5d 100644 --- a/swh/archiver/tests/test_checker.py +++ b/swh/archiver/tests/test_checker.py @@ -1,147 +1,149 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import gzip import tempfile import unittest import pytest from swh.archiver.checker import RepairContentChecker from swh.model import hashutil from swh.objstorage.exc import ObjNotFoundError class MockBackupObjStorage(): def __init__(self): self.values = {} def add(self, value, obj_id): self.values[obj_id] = value def get(self, obj_id): try: return self.values[obj_id] except KeyError: raise ObjNotFoundError(obj_id) @pytest.mark.fs class TestRepairChecker(unittest.TestCase): """ Test the content integrity checker """ def setUp(self): super().setUp() self._alter_config() self.checker = RepairContentChecker() self.checker.backups = [MockBackupObjStorage(), MockBackupObjStorage()] def _alter_config(self): RepairContentChecker.parse_config_file = ( lambda cls: { 'storage': {'cls': 'pathslicing', 'args': {'root': tempfile.mkdtemp(), 'slicing': '0:2/2:4/4:6'}}, 'batch_size': 1000, 'log_tag': 'objstorage_test', 'backup_storages': {} } ) def _corrupt_content(self, obj_id): """ Make the given content invalid. """ hex_obj_id = hashutil.hash_to_hex(obj_id) file_path = self.checker.objstorage._obj_path(hex_obj_id) with gzip.open(file_path, 'wb') as f: f.write(b'Unexpected content') def _is_corrupted(self, obj_id): """ Ensure the given object is corrupted """ return self.checker._check_content(obj_id) == 'corrupted' def _is_missing(self, obj_id): """ Ensure the given object is missing """ return self.checker._check_content(obj_id) == 'missing' def test_check_valid_content(self): # Check that a valid content is valid. content = b'check_valid_content' obj_id = self.checker.objstorage.add(content) self.assertFalse(self._is_corrupted(obj_id)) self.assertFalse(self._is_missing(obj_id)) def test_check_corrupted_content(self): # Check that an invalid content is noticed. content = b'check_corrupted_content' obj_id = self.checker.objstorage.add(content) self._corrupt_content(obj_id) self.assertTrue(self._is_corrupted(obj_id)) self.assertFalse(self._is_missing(obj_id)) def test_check_missing_content(self): - obj_id = hashutil.hash_data(b'check_missing_content')['sha1'] + hashes = hashutil.MultiHash.from_data( + b'check_missing_content', hash_names=['sha1']).digest() + obj_id = hashes['sha1'] self.assertFalse(self._is_corrupted(obj_id)) self.assertTrue(self._is_missing(obj_id)) def test_repair_content_present_first(self): # Try to repair a content that is in the backup storage. content = b'repair_content_present_first' obj_id = self.checker.objstorage.add(content) # Add a content to the mock self.checker.backups[0].add(content, obj_id) # Corrupt and repair it. self._corrupt_content(obj_id) self.assertTrue(self._is_corrupted(obj_id)) self.checker.corrupted_content(obj_id) self.assertFalse(self._is_corrupted(obj_id)) def test_repair_content_present_second(self): # Try to repair a content that is in the backup storage. content = b'repair_content_present_first' obj_id = self.checker.objstorage.add(content) # Add a content to the mock self.checker.backups[-1].add(content, obj_id) # Corrupt and repair it. self._corrupt_content(obj_id) self.assertTrue(self._is_corrupted(obj_id)) self.checker.corrupted_content(obj_id) self.assertFalse(self._is_corrupted(obj_id)) def test_repair_content_present_distributed(self): # Try to repair two contents that are in separate backup storages. content1 = b'repair_content_present_distributed_2' content2 = b'repair_content_present_distributed_1' obj_id1 = self.checker.objstorage.add(content1) obj_id2 = self.checker.objstorage.add(content2) # Add content to the mock. self.checker.backups[0].add(content1, obj_id1) self.checker.backups[1].add(content2, obj_id2) # Corrupt the contents self._corrupt_content(obj_id1) self._corrupt_content(obj_id2) self.assertTrue(self._is_corrupted(obj_id1)) self.assertTrue(self._is_corrupted(obj_id2)) # Repare them self.checker.corrupted_content(obj_id1) self.checker.corrupted_content(obj_id2) self.assertFalse(self._is_corrupted(obj_id1)) self.assertFalse(self._is_corrupted(obj_id2)) def test_repair_content_missing(self): # Try to repair a content that is NOT in the backup storage. content = b'repair_content_missing' obj_id = self.checker.objstorage.add(content) # Corrupt the content self._corrupt_content(obj_id) self.assertTrue(self._is_corrupted(obj_id)) # Try to repair it self.checker.corrupted_content(obj_id) self.assertTrue(self._is_corrupted(obj_id))