diff --git a/swh/objstorage/tests/test_multiplexer_filter.py b/swh/objstorage/tests/test_multiplexer_filter.py index 79bfe02..f31af11 100644 --- a/swh/objstorage/tests/test_multiplexer_filter.py +++ b/swh/objstorage/tests/test_multiplexer_filter.py @@ -1,373 +1,336 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import random +import tempfile import unittest +import random from string import ascii_lowercase from nose.tools import istest from nose.plugins.attrib import attr from swh.core import hashutil from swh.objstorage.exc import ObjNotFoundError, Error -from swh.objstorage import ObjStorage -from swh.objstorage.multiplexer.filter import (add_filter, read_only, - id_prefix, id_regex) +from swh.objstorage import get_objstorage +from swh.objstorage.multiplexer.filter import read_only, id_prefix, id_regex def get_random_content(): return bytes(''.join(random.sample(ascii_lowercase, 10)), 'utf8') -class MockObjStorage(ObjStorage): - """ Mock an object storage for testing the filters. - """ - def __init__(self): - self.objects = {} - - def __contains__(self, obj_id): - return obj_id in self.objects - - def __len__(self): - return len(self.objects) - - def __iter__(self): - return iter(self.objects) - - def id(self, content): - # Id is the content itself for easily choose the id of - # a content for filtering. - return hashutil.hashdata(content)['sha1'] - - def add(self, content, obj_id=None, check_presence=True): - if obj_id is None: - obj_id = self.id(content) - - if check_presence and obj_id in self.objects: - return obj_id - - self.objects[obj_id] = content - return obj_id - - def restore(self, content, obj_id=None): - return self.add(content, obj_id, check_presence=False) - - def get(self, obj_id): - if obj_id not in self: - raise ObjNotFoundError(obj_id) - return self.objects[obj_id] - - def check(self, obj_id): - if obj_id not in self: - raise ObjNotFoundError(obj_id) - if obj_id != self.id(self.objects[obj_id]): - raise Error(obj_id) - - def get_random(self, batch_size): - batch_size = min(len(self), batch_size) - return random.sample(list(self.objects), batch_size) - - @attr('!db') class MixinTestReadFilter(unittest.TestCase): # Read only filter should not allow writing def setUp(self): super().setUp() - storage = MockObjStorage() - + pstorage = {'cls': 'pathslicing', + 'args': {'root': tempfile.mkdtemp(), + 'slicing': '0:5'}} + base_storage = get_objstorage(**pstorage) + base_storage.id = lambda cont: hashutil.hashdata(cont)['sha1'] + self.storage = get_objstorage('filtered', + {'storage_conf': pstorage, + 'filters_conf': [read_only()]}) self.valid_content = b'pre-existing content' self.invalid_content = b'invalid_content' self.true_invalid_content = b'Anything that is not correct' self.absent_content = b'non-existent content' # Create a valid content. - self.valid_id = storage.add(self.valid_content) + self.valid_id = base_storage.add(self.valid_content) # Create an invalid id and add a content with it. - self.invalid_id = storage.id(self.true_invalid_content) - storage.add(self.invalid_content, obj_id=self.invalid_id) + self.invalid_id = base_storage.id(self.true_invalid_content) + base_storage.add(self.invalid_content, obj_id=self.invalid_id) # Compute an id for a non-existing content. - self.absent_id = storage.id(self.absent_content) - - self.storage = add_filter(storage, read_only()) + self.absent_id = base_storage.id(self.absent_content) @istest def can_contains(self): self.assertTrue(self.valid_id in self.storage) self.assertTrue(self.invalid_id in self.storage) self.assertFalse(self.absent_id in self.storage) @istest def can_iter(self): self.assertIn(self.valid_id, iter(self.storage)) self.assertIn(self.invalid_id, iter(self.storage)) @istest def can_len(self): self.assertEqual(2, len(self.storage)) @istest def can_get(self): self.assertEqual(self.valid_content, self.storage.get(self.valid_id)) self.assertEqual(self.invalid_content, self.storage.get(self.invalid_id)) @istest def can_check(self): with self.assertRaises(ObjNotFoundError): self.storage.check(self.absent_id) with self.assertRaises(Error): self.storage.check(self.invalid_id) self.storage.check(self.valid_id) @istest def can_get_random(self): - self.assertEqual(1, len(self.storage.get_random(1))) - self.assertEqual(len(self.storage), len(self.storage.get_random(1000))) + self.assertEqual(1, len(list(self.storage.get_random(1)))) + print(list(self.storage.get_random(1000))) + self.assertEqual(len(list(self.storage)), + len(set(self.storage.get_random(1000)))) @istest def cannot_add(self): new_id = self.storage.add(b'New content') result = self.storage.add(self.valid_content, self.valid_id) - self.assertNotIn(new_id, self.storage) + self.assertIsNone(new_id, self.storage) self.assertIsNone(result) @istest def cannot_restore(self): - new_id = self.storage.restore(b'New content') result = self.storage.restore(self.valid_content, self.valid_id) - self.assertNotIn(new_id, self.storage) self.assertIsNone(result) class MixinTestIdFilter(): """ Mixin class that tests the filters based on filter.IdFilter Methods "make_valid", "make_invalid" and "filter_storage" must be implemented by subclasses. """ def setUp(self): super().setUp() # Use a hack here : as the mock uses the content as id, it is easy to # create contents that are filtered or not. self.prefix = '71' - storage = MockObjStorage() - # Make the storage filtered + self.sconf = {'cls': 'pathslicing', + 'args': {'root': tempfile.mkdtemp(), + 'slicing': '0:5'}} + storage = get_objstorage(**self.sconf) self.base_storage = storage - self.storage = self.filter_storage(storage) + self.storage = self.filter_storage(self.sconf) + # Set the id calculators + storage.id = lambda cont: hashutil.hashdata(cont)['sha1'] # Present content with valid id self.present_valid_content = self.ensure_valid(b'yroqdtotji') self.present_valid_id = storage.id(self.present_valid_content) # Present content with invalid id self.present_invalid_content = self.ensure_invalid(b'glxddlmmzb') self.present_invalid_id = storage.id(self.present_invalid_content) # Missing content with valid id self.missing_valid_content = self.ensure_valid(b'rmzkdclkez') self.missing_valid_id = storage.id(self.missing_valid_content) # Missing content with invalid id self.missing_invalid_content = self.ensure_invalid(b'hlejfuginh') self.missing_invalid_id = storage.id(self.missing_invalid_content) # Present corrupted content with valid id self.present_corrupted_valid_content = self.ensure_valid(b'cdsjwnpaij') self.true_present_corrupted_valid_content = self.ensure_valid( b'mgsdpawcrr') self.present_corrupted_valid_id = storage.id( self.true_present_corrupted_valid_content) # Present corrupted content with invalid id self.present_corrupted_invalid_content = self.ensure_invalid( b'pspjljnrco') self.true_present_corrupted_invalid_content = self.ensure_invalid( b'rjocbnnbso') self.present_corrupted_invalid_id = storage.id( self.true_present_corrupted_invalid_content) # Missing (potentially) corrupted content with valid id self.missing_corrupted_valid_content = self.ensure_valid( b'zxkokfgtou') self.true_missing_corrupted_valid_content = self.ensure_valid( b'royoncooqa') self.missing_corrupted_valid_id = storage.id( self.true_missing_corrupted_valid_content) # Missing (potentially) corrupted content with invalid id self.missing_corrupted_invalid_content = self.ensure_invalid( b'hxaxnrmnyk') self.true_missing_corrupted_invalid_content = self.ensure_invalid( b'qhbolyuifr') self.missing_corrupted_invalid_id = storage.id( self.true_missing_corrupted_invalid_content) # Add the content that are supposed to be present - storage.add(self.present_valid_content) - storage.add(self.present_invalid_content) - storage.add(self.present_corrupted_valid_content, - obj_id=self.present_corrupted_valid_id) - storage.add(self.present_corrupted_invalid_content, - obj_id=self.present_corrupted_invalid_id) - - def filter_storage(self, storage): + self.storage.add(self.present_valid_content) + self.storage.add(self.present_invalid_content) + self.storage.add(self.present_corrupted_valid_content, + obj_id=self.present_corrupted_valid_id) + self.storage.add(self.present_corrupted_invalid_content, + obj_id=self.present_corrupted_invalid_id) + + def filter_storage(self, sconf): raise NotImplementedError( 'Id_filter test class must have a filter_storage method') def ensure_valid(self, content=None): if content is None: content = get_random_content() while not self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content def ensure_invalid(self, content=None): if content is None: content = get_random_content() while self.storage.is_valid(self.base_storage.id(content)): content = get_random_content() return content @istest def contains(self): # Both contents are present, but the invalid one should be ignored. self.assertTrue(self.present_valid_id in self.storage) self.assertFalse(self.present_invalid_id in self.storage) self.assertFalse(self.missing_valid_id in self.storage) self.assertFalse(self.missing_invalid_id in self.storage) self.assertTrue(self.present_corrupted_valid_id in self.storage) self.assertFalse(self.present_corrupted_invalid_id in self.storage) self.assertFalse(self.missing_corrupted_valid_id in self.storage) self.assertFalse(self.missing_corrupted_invalid_id in self.storage) @istest def iter(self): self.assertIn(self.present_valid_id, iter(self.storage)) self.assertNotIn(self.present_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_valid_id, iter(self.storage)) self.assertNotIn(self.missing_invalid_id, iter(self.storage)) self.assertIn(self.present_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage)) self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage)) @istest def len(self): # Four contents are present, but only two should be valid. self.assertEqual(2, len(self.storage)) @istest def get(self): self.assertEqual(self.present_valid_content, self.storage.get(self.present_valid_id)) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_invalid_id) self.assertEqual(self.present_corrupted_valid_content, self.storage.get(self.present_corrupted_valid_id)) with self.assertRaises(ObjNotFoundError): self.storage.get(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.get(self.missing_corrupted_invalid_id) @istest def check(self): self.storage.check(self.present_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_invalid_id) with self.assertRaises(Error): self.storage.check(self.present_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.present_corrupted_invalid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(self.missing_corrupted_invalid_id) @istest def get_random(self): self.assertEqual(0, len(list(self.storage.get_random(0)))) random_content = list(self.storage.get_random(1000)) self.assertIn(self.present_valid_id, random_content) self.assertNotIn(self.present_invalid_id, random_content) self.assertNotIn(self.missing_valid_id, random_content) self.assertNotIn(self.missing_invalid_id, random_content) self.assertIn(self.present_corrupted_valid_id, random_content) self.assertNotIn(self.present_corrupted_invalid_id, random_content) self.assertNotIn(self.missing_corrupted_valid_id, random_content) self.assertNotIn(self.missing_corrupted_invalid_id, random_content) @istest def add(self): # Add valid and invalid contents to the storage and check their # presence with the unfiltered storage. valid_content = self.ensure_valid(b'ulepsrjbgt') valid_id = self.base_storage.id(valid_content) invalid_content = self.ensure_invalid(b'znvghkjked') invalid_id = self.base_storage.id(invalid_content) self.storage.add(valid_content) self.storage.add(invalid_content) self.assertTrue(valid_id in self.base_storage) self.assertFalse(invalid_id in self.base_storage) @istest def restore(self): # Add corrupted content to the storage and the try to restore it valid_content = self.ensure_valid(b'ulepsrjbgt') valid_id = self.base_storage.id(valid_content) corrupted_content = self.ensure_valid(b'ltjkjsloyb') corrupted_id = self.base_storage.id(corrupted_content) self.storage.add(corrupted_content, obj_id=valid_id) with self.assertRaises(ObjNotFoundError): self.storage.check(corrupted_id) with self.assertRaises(Error): self.storage.check(valid_id) self.storage.restore(valid_content) self.storage.check(valid_id) @attr('!db') class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.prefix = b'71' super().setUp() def ensure_valid(self, content): obj_id = hashutil.hashdata(content)['sha1'] hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertTrue(hex_obj_id.startswith(self.prefix)) return content def ensure_invalid(self, content): obj_id = hashutil.hashdata(content)['sha1'] hex_obj_id = hashutil.hash_to_hex(obj_id) self.assertFalse(hex_obj_id.startswith(self.prefix)) return content - def filter_storage(self, storage): - return add_filter(storage, id_prefix(self.prefix)) + def filter_storage(self, sconf): + return get_objstorage('filtered', + {'storage_conf': sconf, + 'filters_conf': [id_prefix(self.prefix)]}) @attr('!db') class TestRegexFilter(MixinTestIdFilter, unittest.TestCase): def setUp(self): self.regex = r'[a-f][0-9].*' super().setUp() - def filter_storage(self, storage): - return add_filter(storage, id_regex(self.regex)) + def filter_storage(self, sconf): + return get_objstorage('filtered', + {'storage_conf': sconf, + 'filters_conf': [id_regex(self.regex)]}) diff --git a/swh/objstorage/tests/test_objstorage_api.py b/swh/objstorage/tests/test_objstorage_api.py index 6d27856..6d23451 100644 --- a/swh/objstorage/tests/test_objstorage_api.py +++ b/swh/objstorage/tests/test_objstorage_api.py @@ -1,28 +1,28 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import tempfile import unittest from nose.plugins.attrib import attr +from swh.objstorage import get_objstorage from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture from swh.objstorage.tests.server_testing import ServerTestFixture -from swh.objstorage.api.client import RemoteObjStorage from swh.objstorage.api.server import app @attr('db') class TestRemoteObjStorage(ServerTestFixture, ObjStorageTestFixture, unittest.TestCase): """ Test the remote archive API. """ def setUp(self): self.config = {'storage_base': tempfile.mkdtemp(), 'storage_slicing': '0:1/0:5'} self.app = app super().setUp() - self.storage = RemoteObjStorage(self.url()) + self.storage = get_objstorage('remote', {'base_url': self.url()}) diff --git a/swh/objstorage/tests/test_objstorage_instantiation.py b/swh/objstorage/tests/test_objstorage_instantiation.py new file mode 100644 index 0000000..f512393 --- /dev/null +++ b/swh/objstorage/tests/test_objstorage_instantiation.py @@ -0,0 +1,47 @@ +# Copyright (C) 2015-2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import tempfile +import unittest + +from nose.tools import istest + +from swh.objstorage.tests.server_testing import ServerTestFixture +from swh.objstorage import get_objstorage +from swh.objstorage.objstorage_pathslicing import PathSlicingObjStorage +from swh.objstorage.api.client import RemoteObjStorage +from swh.objstorage.api.server import app + + +class TestObjStorageInitialization(ServerTestFixture, unittest.TestCase): + """ Test that the methods for ObjStorage initializations with + `get_objstorage` works properly. + """ + + def setUp(self): + self.path = tempfile.mkdtemp() + # Server is launched at self.url() + self.app = app + self.config = {'storage_base': tempfile.mkdtemp(), + 'storage_slicing': '0:1/0:5'} + super().setUp() + + @istest + def pathslicing_objstorage(self): + conf = { + 'cls': 'pathslicing', + 'args': {'root': self.path, 'slicing': '0:2/0:5'} + } + st = get_objstorage(**conf) + self.assertTrue(isinstance(st, PathSlicingObjStorage)) + + @istest + def remote_objstorage(self): + conf = { + 'cls': 'remote', + 'args': {'base_url': self.url()} + } + st = get_objstorage(**conf) + self.assertTrue(isinstance(st, RemoteObjStorage)) diff --git a/swh/objstorage/tests/test_objstorage_pathslicing.py b/swh/objstorage/tests/test_objstorage_pathslicing.py index 59b7de5..dfaf3d3 100644 --- a/swh/objstorage/tests/test_objstorage_pathslicing.py +++ b/swh/objstorage/tests/test_objstorage_pathslicing.py @@ -1,76 +1,79 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import tempfile import unittest from nose.tools import istest from swh.core import hashutil from swh.objstorage import exc -from swh.objstorage import PathSlicingObjStorage +from swh.objstorage import get_objstorage from objstorage_testing import ObjStorageTestFixture class TestpathSlicingObjStorage(ObjStorageTestFixture, unittest.TestCase): def setUp(self): super().setUp() self.slicing = '0:2/2:4/4:6' self.tmpdir = tempfile.mkdtemp() - self.storage = PathSlicingObjStorage(self.tmpdir, self.slicing) + self.storage = get_objstorage( + 'pathslicing', + {'root': self.tmpdir, 'slicing': self.slicing} + ) def content_path(self, obj_id): hex_obj_id = hashutil.hash_to_hex(obj_id) return self.storage._obj_path(hex_obj_id) @istest def contains(self): content_p, obj_id_p = self.hash_content(b'contains_present') content_m, obj_id_m = self.hash_content(b'contains_missing') self.storage.add(content_p, obj_id=obj_id_p) self.assertIn(obj_id_p, self.storage) self.assertNotIn(obj_id_m, self.storage) @istest def iter(self): content, obj_id = self.hash_content(b'iter') self.assertEqual(list(iter(self.storage)), []) self.storage.add(content, obj_id=obj_id) self.assertEqual(list(iter(self.storage)), [obj_id]) @istest def len(self): content, obj_id = self.hash_content(b'check_not_gzip') self.assertEqual(len(self.storage), 0) self.storage.add(content, obj_id=obj_id) self.assertEqual(len(self.storage), 1) @istest def check_not_gzip(self): content, obj_id = self.hash_content(b'check_not_gzip') self.storage.add(content, obj_id=obj_id) with open(self.content_path(obj_id), 'ab') as f: # Add garbage. f.write(b'garbage') with self.assertRaises(exc.Error): self.storage.check(obj_id) @istest def check_id_mismatch(self): content, obj_id = self.hash_content(b'check_id_mismatch') self.storage.add(content, obj_id=obj_id) with open(self.content_path(obj_id), 'wb') as f: f.write(b'unexpected content') with self.assertRaises(exc.Error): self.storage.check(obj_id) @istest def get_random_contents(self): content, obj_id = self.hash_content(b'get_random_content') self.storage.add(content, obj_id=obj_id) random_contents = list(self.storage.get_random(1)) self.assertEqual(1, len(random_contents)) self.assertIn(obj_id, random_contents)