diff --git a/swh/objstorage/tests/test_multiplexer_filter.py b/swh/objstorage/tests/test_multiplexer_filter.py index b7322f2..64c55db 100644 --- a/swh/objstorage/tests/test_multiplexer_filter.py +++ b/swh/objstorage/tests/test_multiplexer_filter.py @@ -1,331 +1,342 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import random +import shutil import tempfile import unittest -import random from string import ascii_lowercase from nose.tools import istest from swh.model import hashutil from swh.objstorage.exc import ObjNotFoundError, Error from swh.objstorage import get_objstorage from swh.objstorage.multiplexer.filter import read_only, id_prefix, id_regex def get_random_content(): return bytes(''.join(random.sample(ascii_lowercase, 10)), 'utf8') class MixinTestReadFilter(unittest.TestCase): # Read only filter should not allow writing def setUp(self): super().setUp() + self.tmpdir = tempfile.mkdtemp() pstorage = {'cls': 'pathslicing', - 'args': {'root': tempfile.mkdtemp(), + 'args': {'root': self.tmpdir, 'slicing': '0:5'}} base_storage = get_objstorage(**pstorage) base_storage.id = lambda cont: hashutil.hash_data(cont)['sha1'] self.storage = get_objstorage('filtered', {'storage_conf': pstorage, 'filters_conf': [read_only()]}) self.valid_content = b'pre-existing content' self.invalid_content = b'invalid_content' self.true_invalid_content = b'Anything that is not correct' self.absent_content = b'non-existent content' # Create a valid content. self.valid_id = base_storage.add(self.valid_content) # Create an invalid id and add a content with it. self.invalid_id = base_storage.id(self.true_invalid_content) base_storage.add(self.invalid_content, obj_id=self.invalid_id) # Compute an id for a non-existing content. self.absent_id = base_storage.id(self.absent_content) + def tearDown(self): + super().tearDown() + shutil.rmtree(self.tmpdir) + @istest def can_contains(self): self.assertTrue(self.valid_id in self.storage) self.assertTrue(self.invalid_id in self.storage) self.assertFalse(self.absent_id in self.storage) @istest def can_iter(self): self.assertIn(self.valid_id, iter(self.storage)) self.assertIn(self.invalid_id, iter(self.storage)) @istest def can_len(self): self.assertEqual(2, len(self.storage)) @istest def can_get(self): self.assertEqual(self.valid_content, self.storage.get(self.valid_id)) self.assertEqual(self.invalid_content, self.storage.get(self.invalid_id)) @istest def can_check(self): with self.assertRaises(ObjNotFoundError): self.storage.check(self.absent_id) with self.assertRaises(Error): self.storage.check(self.invalid_id) self.storage.check(self.valid_id) @istest def can_get_random(self): self.assertEqual(1, len(list(self.storage.get_random(1)))) self.assertEqual(len(list(self.storage)), len(set(self.storage.get_random(1000)))) @istest def cannot_add(self): new_id = self.storage.add(b'New content') result = self.storage.add(self.valid_content, self.valid_id) self.assertIsNone(new_id, self.storage) self.assertIsNone(result) @istest def cannot_restore(self): result = self.storage.restore(self.valid_content, self.valid_id) self.assertIsNone(result) class MixinTestIdFilter(): """ Mixin class that tests the filters based on filter.IdFilter Methods "make_valid", "make_invalid" and "filter_storage" must be implemented by subclasses. """ def setUp(self): super().setUp() # Use a hack here : as the mock uses the content as id, it is easy to # create contents that are filtered or not. self.prefix = '71' + self.tmpdir = tempfile.mkdtemp() # Make the storage filtered self.sconf = {'cls': 'pathslicing', - 'args': {'root': tempfile.mkdtemp(), + 'args': {'root': self.tmpdir, 'slicing': '0:5'}} storage = get_objstorage(**self.sconf) self.base_storage = storage self.storage = self.filter_storage(self.sconf) # Set the id calculators storage.id = lambda cont: hashutil.hash_data(cont)['sha1'] # Present content with valid id self.present_valid_content = self.ensure_valid(b'yroqdtotji') self.present_valid_id = storage.id(self.present_valid_content) # Present content with invalid id self.present_invalid_content = self.ensure_invalid(b'glxddlmmzb') self.present_invalid_id = storage.id(self.present_invalid_content) # Missing content with valid id self.missing_valid_content = self.ensure_valid(b'rmzkdclkez') self.missing_valid_id = storage.id(self.missing_valid_content) # Missing content with invalid id self.missing_invalid_content = self.ensure_invalid(b'hlejfuginh') self.missing_invalid_id = storage.id(self.missing_invalid_content) # Present corrupted content with valid id self.present_corrupted_valid_content = self.ensure_valid(b'cdsjwnpaij') self.true_present_corrupted_valid_content = self.ensure_valid( b'mgsdpawcrr') self.present_corrupted_valid_id = storage.id( self.true_present_corrupted_valid_content) # Present corrupted content with invalid id self.present_corrupted_invalid_content = self.ensure_invalid( b'pspjljnrco') self.true_present_corrupted_invalid_content = self.ensure_invalid( b'rjocbnnbso') self.present_corrupted_invalid_id = storage.id( self.true_present_corrupted_invalid_content) # Missing (potentially) corrupted content with valid id self.missing_corrupted_valid_content = self.ensure_valid( b'zxkokfgtou') self.true_missing_corrupted_valid_content = self.ensure_valid( b'royoncooqa') self.missing_corrupted_valid_id = storage.id( self.true_missing_corrupted_valid_content) # Missing (potentially) corrupted content with invalid id self.missing_corrupted_invalid_content = self.ensure_invalid( b'hxaxnrmnyk') self.true_missing_corrupted_invalid_content = self.ensure_invalid( b'qhbolyuifr') self.missing_corrupted_invalid_id = storage.id( self.true_missing_corrupted_invalid_content) # Add the content that are supposed to be present self.storage.add(self.present_valid_content) self.storage.add(self.present_invalid_content) self.storage.add(self.present_corrupted_valid_content, obj_id=self.present_corrupted_valid_id) self.storage.add(self.present_corrupted_invalid_content, obj_id=self.present_corrupted_invalid_id) + def tearDown(self): + super().tearDown() + shutil.rmtree(self.tmpdir) + def filter_storage(self, sconf): raise NotImplementedError( 'Id_filter test class must have a filter_storage method') def ensure_valid(self, content=None): if content is None: content = get_random_content() while not self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content def ensure_invalid(self, content=None): if content is None: content = get_random_content() while self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content @istest def contains(self): # Both contents are present, but the invalid one should be ignored. self.assertTrue(self.present_valid_id in self.storage) self.assertFalse(self.present_invalid_id in self.storage) self.assertFalse(self.missing_valid_id in self.storage) self.assertFalse(self.missing_invalid_id in self.storage) self.assertTrue(self.present_corrupted_valid_id in self.storage) self.assertFalse(self.present_corrupted_invalid_id in self.storage) self.assertFalse(self.missing_corrupted_valid_id in self.storage) self.assertFalse(self.missing_corrupted_invalid_id in self.storage) @istest def iter(self): self.assertIn(self.present_valid_id, iter(self.storage)) self.assertNotIn(self.present_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_valid_id, iter(self.storage)) self.assertNotIn(self.missing_invalid_id, iter(self.storage)) self.assertIn(self.present_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage)) @istest def len(self): # Four contents are present, but only two should be valid. self.assertEqual(2, len(self.storage)) @istest def get(self): self.assertEqual(self.present_valid_content, self.storage.get(self.present_valid_id)) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_invalid_id) self.assertEqual(self.present_corrupted_valid_content, self.storage.get(self.present_corrupted_valid_id)) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_invalid_id) @istest def check(self): self.storage.check(self.present_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_invalid_id) with self.assertRaises(Error): self.storage.check(self.present_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_invalid_id) @istest def get_random(self): self.assertEqual(0, len(list(self.storage.get_random(0)))) random_content = list(self.storage.get_random(1000)) self.assertIn(self.present_valid_id, random_content) self.assertNotIn(self.present_invalid_id, random_content) self.assertNotIn(self.missing_valid_id, random_content) self.assertNotIn(self.missing_invalid_id, random_content) self.assertIn(self.present_corrupted_valid_id, random_content) self.assertNotIn(self.present_corrupted_invalid_id, random_content) self.assertNotIn(self.missing_corrupted_valid_id, random_content) self.assertNotIn(self.missing_corrupted_invalid_id, random_content) @istest def add(self): # Add valid and invalid contents to the storage and check their # presence with the unfiltered storage. valid_content = self.ensure_valid(b'ulepsrjbgt') valid_id = self.base_storage.id(valid_content) invalid_content = self.ensure_invalid(b'znvghkjked') invalid_id = self.base_storage.id(invalid_content) self.storage.add(valid_content) self.storage.add(invalid_content) self.assertTrue(valid_id in self.base_storage) self.assertFalse(invalid_id in self.base_storage) @istest def restore(self): # Add corrupted content to the storage and the try to restore it valid_content = self.ensure_valid(b'ulepsrjbgt') valid_id = self.base_storage.id(valid_content) corrupted_content = self.ensure_valid(b'ltjkjsloyb') corrupted_id = self.base_storage.id(corrupted_content) self.storage.add(corrupted_content, obj_id=valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(corrupted_id) with self.assertRaises(Error): self.storage.check(valid_id) self.storage.restore(valid_content) self.storage.check(valid_id) class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.prefix = b'71' super().setUp() def ensure_valid(self, content): obj_id = hashutil.hash_data(content)['sha1'] hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertTrue(hex_obj_id.startswith(self.prefix)) return content def ensure_invalid(self, content): obj_id = hashutil.hash_data(content)['sha1'] hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertFalse(hex_obj_id.startswith(self.prefix)) return content def filter_storage(self, sconf): return get_objstorage('filtered', {'storage_conf': sconf, 'filters_conf': [id_prefix(self.prefix)]}) class TestRegexFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.regex = r'[a-f][0-9].*' super().setUp() def filter_storage(self, sconf): return get_objstorage('filtered', {'storage_conf': sconf, 'filters_conf': [id_regex(self.regex)]}) diff --git a/swh/objstorage/tests/test_objstorage_api.py b/swh/objstorage/tests/test_objstorage_api.py index c482a8e..0fc1d9d 100644 --- a/swh/objstorage/tests/test_objstorage_api.py +++ b/swh/objstorage/tests/test_objstorage_api.py @@ -1,35 +1,41 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import shutil import tempfile import unittest from swh.core.tests.server_testing import ServerTestFixtureAsync from swh.objstorage import get_objstorage from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture from swh.objstorage.api.server import make_app class TestRemoteObjStorage(ServerTestFixtureAsync, ObjStorageTestFixture, unittest.TestCase): """ Test the remote archive API. """ def setUp(self): + self.tmpdir = tempfile.mkdtemp() self.config = { 'cls': 'pathslicing', 'args': { - 'root': tempfile.mkdtemp(), + 'root': self.tmpdir, 'slicing': '0:1/0:5', 'allow_delete': True, }, 'client_max_size': 8 * 1024 * 1024, } self.app = make_app(self.config) super().setUp() self.storage = get_objstorage('remote', { 'url': self.url() }) + + def tearDown(self): + super().tearDown() + shutil.rmtree(self.tmpdir) diff --git a/swh/objstorage/tests/test_objstorage_instantiation.py b/swh/objstorage/tests/test_objstorage_instantiation.py index a515855..4a1e10a 100644 --- a/swh/objstorage/tests/test_objstorage_instantiation.py +++ b/swh/objstorage/tests/test_objstorage_instantiation.py @@ -1,46 +1,53 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import shutil import tempfile import unittest from nose.tools import istest from swh.objstorage import get_objstorage from swh.objstorage.objstorage_pathslicing import PathSlicingObjStorage from swh.objstorage.api.client import RemoteObjStorage class TestObjStorageInitialization(unittest.TestCase): """ Test that the methods for ObjStorage initializations with `get_objstorage` works properly. """ def setUp(self): self.path = tempfile.mkdtemp() + self.path2 = tempfile.mkdtemp() # Server is launched at self.url() - self.config = {'storage_base': tempfile.mkdtemp(), + self.config = {'storage_base': self.path2, 'storage_slicing': '0:1/0:5'} super().setUp() + def tearDown(self): + super().tearDown() + shutil.rmtree(self.path) + shutil.rmtree(self.path2) + @istest def pathslicing_objstorage(self): conf = { 'cls': 'pathslicing', 'args': {'root': self.path, 'slicing': '0:2/0:5'} } st = get_objstorage(**conf) self.assertTrue(isinstance(st, PathSlicingObjStorage)) @istest def remote_objstorage(self): conf = { 'cls': 'remote', 'args': { 'url': 'http://127.0.0.1:4242/' } } st = get_objstorage(**conf) self.assertTrue(isinstance(st, RemoteObjStorage)) diff --git a/swh/objstorage/tests/test_objstorage_multiplexer.py b/swh/objstorage/tests/test_objstorage_multiplexer.py index 3aaa7b9..d324736 100644 --- a/swh/objstorage/tests/test_objstorage_multiplexer.py +++ b/swh/objstorage/tests/test_objstorage_multiplexer.py @@ -1,90 +1,101 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import os +import shutil import tempfile import unittest from nose.tools import istest from swh.objstorage import PathSlicingObjStorage from swh.objstorage.multiplexer import MultiplexerObjStorage from swh.objstorage.multiplexer.filter import add_filter, read_only from objstorage_testing import ObjStorageTestFixture class TestMultiplexerObjStorage(ObjStorageTestFixture, unittest.TestCase): def setUp(self): super().setUp() - self.storage_v1 = PathSlicingObjStorage(tempfile.mkdtemp(), '0:2/2:4') - self.storage_v2 = PathSlicingObjStorage(tempfile.mkdtemp(), '0:1/0:5') + self.tmpdir = tempfile.mkdtemp() + os.mkdir(os.path.join(self.tmpdir, 'root1')) + os.mkdir(os.path.join(self.tmpdir, 'root2')) + self.storage_v1 = PathSlicingObjStorage( + os.path.join(self.tmpdir, 'root1'), '0:2/2:4') + self.storage_v2 = PathSlicingObjStorage( + os.path.join(self.tmpdir, 'root2'), '0:1/0:5') self.r_storage = add_filter(self.storage_v1, read_only()) self.w_storage = self.storage_v2 self.storage = MultiplexerObjStorage([self.r_storage, self.w_storage]) + def tearDown(self): + super().tearDown() + shutil.rmtree(self.tmpdir) + @istest def contains(self): content_p, obj_id_p = self.hash_content(b'contains_present') content_m, obj_id_m = self.hash_content(b'contains_missing') self.storage.add(content_p, obj_id=obj_id_p) self.assertIn(obj_id_p, self.storage) self.assertNotIn(obj_id_m, self.storage) @istest def iter(self): content, obj_id = self.hash_content(b'iter') self.assertEqual(list(iter(self.storage)), []) self.storage.add(content, obj_id=obj_id) self.assertEqual(list(iter(self.storage)), [obj_id]) @istest def len(self): content, obj_id = self.hash_content(b'len') self.assertEqual(len(self.storage), 0) self.storage.add(content, obj_id=obj_id) self.assertEqual(len(self.storage), 1) @istest def len_multiple(self): content, obj_id = self.hash_content(b'len_multiple') # Add a content to the read-only storage self.storage_v1.add(content) self.assertEqual(len(self.storage), 1) # By adding the same content to the global storage, it should be # Replicated. # len() behavior is to indicates the number of files, not unique # contents. self.storage.add(content) self.assertEqual(len(self.storage), 2) @istest def delete_missing(self): self.storage_v1.allow_delete = True self.storage_v2.allow_delete = True super().delete_missing() @istest def delete_present(self): self.storage_v1.allow_delete = True self.storage_v2.allow_delete = True super().delete_present() @istest def get_random_contents(self): content, obj_id = self.hash_content(b'get_random_content') self.storage.add(content) random_contents = list(self.storage.get_random(1)) self.assertEqual(1, len(random_contents)) self.assertIn(obj_id, random_contents) @istest def access_readonly(self): # Add a content to the readonly storage content, obj_id = self.hash_content(b'content in read-only') self.storage_v1.add(content) # Try to retrieve it on the main storage self.assertIn(obj_id, self.storage) diff --git a/swh/objstorage/tests/test_objstorage_pathslicing.py b/swh/objstorage/tests/test_objstorage_pathslicing.py index 8e9c4a0..6d2c134 100644 --- a/swh/objstorage/tests/test_objstorage_pathslicing.py +++ b/swh/objstorage/tests/test_objstorage_pathslicing.py @@ -1,71 +1,76 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import shutil import tempfile import unittest from nose.tools import istest from swh.model import hashutil from swh.objstorage import exc from swh.objstorage import get_objstorage from objstorage_testing import ObjStorageTestFixture class TestPathSlicingObjStorage(ObjStorageTestFixture, unittest.TestCase): def setUp(self): super().setUp() self.slicing = '0:2/2:4/4:6' self.tmpdir = tempfile.mkdtemp() self.storage = get_objstorage( 'pathslicing', {'root': self.tmpdir, 'slicing': self.slicing} ) + def tearDown(self): + super().tearDown() + shutil.rmtree(self.tmpdir) + def content_path(self, obj_id): hex_obj_id = hashutil.hash_to_hex(obj_id) return self.storage._obj_path(hex_obj_id) @istest def iter(self): content, obj_id = self.hash_content(b'iter') self.assertEqual(list(iter(self.storage)), []) self.storage.add(content, obj_id=obj_id) self.assertEqual(list(iter(self.storage)), [obj_id]) @istest def len(self): content, obj_id = self.hash_content(b'len') self.assertEqual(len(self.storage), 0) self.storage.add(content, obj_id=obj_id) self.assertEqual(len(self.storage), 1) @istest def check_not_gzip(self): content, obj_id = self.hash_content(b'check_not_gzip') self.storage.add(content, obj_id=obj_id) with open(self.content_path(obj_id), 'ab') as f: # Add garbage. f.write(b'garbage') with self.assertRaises(exc.Error): self.storage.check(obj_id) @istest def check_id_mismatch(self): content, obj_id = self.hash_content(b'check_id_mismatch') self.storage.add(content, obj_id=obj_id) with open(self.content_path(obj_id), 'wb') as f: f.write(b'unexpected content') with self.assertRaises(exc.Error): self.storage.check(obj_id) @istest def get_random_contents(self): content, obj_id = self.hash_content(b'get_random_content') self.storage.add(content, obj_id=obj_id) random_contents = list(self.storage.get_random(1)) self.assertEqual(1, len(random_contents)) self.assertIn(obj_id, random_contents)