Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124715
D53.id188.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
33 KB
Subscribers
None
D53.id188.diff
View Options
diff --git a/swh/storage/objstorage/multiplexer/__init__.py b/swh/storage/objstorage/multiplexer/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/objstorage/multiplexer/__init__.py
@@ -0,0 +1,4 @@
+from .multiplexer_objstorage import MultiplexerObjStorage
+
+
+__all__ = [MultiplexerObjStorage]
diff --git a/swh/storage/objstorage/multiplexer/filter/__init__.py b/swh/storage/objstorage/multiplexer/filter/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/objstorage/multiplexer/filter/__init__.py
@@ -0,0 +1,98 @@
+import functools
+
+from .read_write_filter import ReadObjStorageFilter
+from .id_filter import RegexIdObjStorageFilter, PrefixIdObjStorageFilter
+
+
+_FILTERS_CLASSES = {
+ 'readonly': ReadObjStorageFilter,
+ 'regex': RegexIdObjStorageFilter,
+ 'prefix': PrefixIdObjStorageFilter
+}
+
+
+_FILTERS_PRIORITY = {
+ 'readonly': 0,
+ 'prefix': 1,
+ 'regex': 2
+}
+
+
+def read_only():
+ return {'type': 'readonly'}
+
+
+def id_prefix(prefix):
+ return {'type': 'prefix', 'prefix': prefix}
+
+
+def id_regex(regex):
+ return {'type': 'regex', 'regex': regex}
+
+
+def _filter_priority(self, filter_type):
+ """ Get the priority of this filter.
+
+ Priority is a value that indicates if the operation of the
+ filter is time-consuming (smaller values means quick execution),
+ or very likely to be almost always the same value (False being small,
+ and True high).
+
+ In case the filters are chained, they will be ordered in a way that
+ small priorities (quick execution or instantly break the chain) are
+ executed first.
+
+ Default value is 1. Value 0 is recommended for storages that change
+ behavior only by disabling some operations (making the method return
+ None).
+ """
+ return _FILTERS_PRIORITY.get(filter_type, 1)
+
+
+def add_filter(storage, filter_conf):
+ """ Add a filter to the given storage.
+
+ Args:
+ storage (ObjStorage): storage which will be filtered.
+ filter_conf (dict): configuration of an ObjStorageFilter, given as
+ a dictionnary that contains the keys:
+ - type: which represent the type of filter, one of the keys
+ of FILTERS
+ - Every arguments that this type of filter require.
+
+ Returns:
+ A filtered storage that perform only the valid operations.
+ """
+ type = filter_conf['type']
+ args = {k: v for k, v in filter_conf.items() if k is not 'type'}
+ filter = _FILTERS_CLASSES[type](storage=storage, **args)
+ return filter
+
+
+def add_filters(storage, *filter_confs):
+ """ Add multiple filters to the given storage.
+
+ (See filter.add_filter)
+
+ Args:
+ storage (ObjStorage): storage which will be filtered.
+ filter_confs (list): any number of filter conf, as a dict with:
+ - type: which represent the type of filter, one of the keys of
+ FILTERS.
+ - Every arguments that this type of filter require.
+
+ Returns:
+ A filtered storage that fulfill the requirement of all the given
+ filters.
+ """
+ # Reverse sorting in order to put the filter with biggest priority first.
+ filter_confs.sort(key=lambda conf: _filter_priority(conf['type']),
+ reverse=True)
+
+ # Add the bigest filter to the storage, and reduce it to accumulate filters
+ # on top of it, until the smallest (fastest, see filter.filter_priority) is
+ # added.
+ return functools.reduce(
+ lambda stor, conf: add_filter(stor, conf),
+ [storage] + filter_confs
+ )
diff --git a/swh/storage/objstorage/multiplexer/filter/filter.py b/swh/storage/objstorage/multiplexer/filter/filter.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/objstorage/multiplexer/filter/filter.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2015-2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from ...objstorage import ObjStorage
+
+
+class ObjStorageFilter(ObjStorage):
+ """ Base implementation of a filter that allow inputs on ObjStorage or not
+
+ This class copy the API of ...objstorage in order to filter the inputs
+ of this class.
+ If the operation is allowed, return the result of this operation
+ applied to the destination implementation. Otherwise, just return
+ without any operation.
+
+ This class is an abstract base class for a classic read/write storage.
+ Filters can inherit from it and only redefine some methods in order
+ to change behavior.
+ """
+
+ def __init__(self, storage):
+ self.storage = storage
+
+ def __contains__(self, *args, **kwargs):
+ return self.storage.__contains__(*args, **kwargs)
+
+ def __iter__(self):
+ return self.storage.__iter__()
+
+ def __len__(self):
+ return self.storage.__len__()
+
+ def add(self, *args, **kwargs):
+ return self.storage.add(*args, **kwargs)
+
+ def restore(self, *args, **kwargs):
+ return self.storage.restore(*args, **kwargs)
+
+ def get(self, *args, **kwargs):
+ return self.storage.get(*args, **kwargs)
+
+ def check(self, *args, **kwargs):
+ return self.storage.check(*args, **kwargs)
+
+ def get_random(self, *args, **kwargs):
+ return self.storage.get_random(*args, **kwargs)
diff --git a/swh/storage/objstorage/multiplexer/filter/id_filter.py b/swh/storage/objstorage/multiplexer/filter/id_filter.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/objstorage/multiplexer/filter/id_filter.py
@@ -0,0 +1,99 @@
+# Copyright (C) 2015-2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import re
+
+from swh.core import hashutil
+
+from .filter import ObjStorageFilter
+from ...objstorage import ID_HASH_ALGO
+from ....exc import ObjNotFoundError
+
+
+def compute_hash(bytes):
+ """ Compute the hash of the given content.
+ """
+ # Checksum is missing, compute it on the fly.
+ h = hashutil._new_hash(ID_HASH_ALGO, len(bytes))
+ h.update(bytes)
+ return h.digest()
+
+
+class IdObjStorageFilter(ObjStorageFilter):
+ """ Filter that only allow operations if the object id match a requirement.
+
+ Even for read operations, check before if the id match the requirements.
+ This may prevent for unnecesary disk access.
+ """
+
+ def is_valid(self, obj_id):
+ """ Indicates if the given id is valid.
+ """
+ raise NotImplementedError('Implementations of an IdObjStorageFilter '
+ 'must have a "is_valid" method')
+
+ def __contains__(self, obj_id, *args, **kwargs):
+ if self.is_valid(obj_id):
+ return self.storage.__contains__(*args, obj_id=obj_id, **kwargs)
+ return False
+
+ def __len__(self):
+ return sum(1 for i in [id for id in self.storage if self.is_valid(id)])
+
+ def __iter__(self):
+ yield from filter(lambda id: self.is_valid(id), iter(self.storage))
+
+ def add(self, content, obj_id=None, check_presence=True, *args, **kwargs):
+ if obj_id is None:
+ obj_id = compute_hash(content)
+ if self.is_valid(obj_id):
+ return self.storage.add(content, *args, obj_id=obj_id, **kwargs)
+
+ def restore(self, content, obj_id=None, *args, **kwargs):
+ if obj_id is None:
+ obj_id = compute_hash(content)
+ if self.is_valid(obj_id):
+ return self.storage.restore(content, *args,
+ obj_id=obj_id, **kwargs)
+
+ def get(self, obj_id, *args, **kwargs):
+ if self.is_valid(obj_id):
+ return self.storage.get(*args, obj_id=obj_id, **kwargs)
+ raise ObjNotFoundError(obj_id)
+
+ def check(self, obj_id, *args, **kwargs):
+ if self.is_valid(obj_id):
+ return self.storage.check(*args, obj_id=obj_id, **kwargs)
+ raise ObjNotFoundError(obj_id)
+
+ def get_random(self, *args, **kwargs):
+ yield from filter(lambda id: self.is_valid(id),
+ self.storage.get_random(*args, **kwargs))
+
+
+class RegexIdObjStorageFilter(IdObjStorageFilter):
+ """ Filter that allow operations if the content's id as hex match a regex.
+ """
+
+ def __init__(self, storage, regex):
+ super().__init__(storage)
+ self.regex = re.compile(regex)
+
+ def is_valid(self, obj_id):
+ hex_obj_id = hashutil.hash_to_hex(obj_id)
+ return self.regex.match(hex_obj_id) is not None
+
+
+class PrefixIdObjStorageFilter(IdObjStorageFilter):
+ """ Filter that allow operations if the hexlified id have a given prefix.
+ """
+
+ def __init__(self, storage, prefix):
+ super().__init__(storage)
+ self.prefix = str(prefix)
+
+ def is_valid(self, obj_id):
+ hex_obj_id = hashutil.hash_to_hex(obj_id)
+ return str(hex_obj_id).startswith(self.prefix)
diff --git a/swh/storage/objstorage/multiplexer/filter/read_write_filter.py b/swh/storage/objstorage/multiplexer/filter/read_write_filter.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/objstorage/multiplexer/filter/read_write_filter.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2015-2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from .filter import ObjStorageFilter
+
+
+class ReadObjStorageFilter(ObjStorageFilter):
+ """ Filter that disable write operation of the storage.
+ """
+
+ def add(self, *args, **kwargs):
+ return
+
+ def restore(self, *args, **kwargs):
+ return
diff --git a/swh/storage/objstorage/multiplexer/multiplexer_objstorage.py b/swh/storage/objstorage/multiplexer/multiplexer_objstorage.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/objstorage/multiplexer/multiplexer_objstorage.py
@@ -0,0 +1,166 @@
+# Copyright (C) 2015-2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+
+from ..objstorage import ObjStorage
+from ...exc import ObjNotFoundError
+
+
+class MultiplexerObjStorage(ObjStorage):
+ """ Implementation of ObjStorage that distribute between multiple storages
+
+ The multiplexer object storage allows an input to be demultiplexed
+ among multiple storages that will or will not accept it by themselves
+ (see .filter package).
+
+ As the ids can be differents, no pre-computed ids should be submitted.
+ Also, there are no guarantees that the returned ids can be used directly
+ into the storages that the multiplexer manage.
+ """
+
+ def __init__(self, storages):
+ self.storages = storages
+
+ def __contains__(self, obj_id):
+ for storage in self.storages:
+ if obj_id in storage:
+ return True
+ return False
+
+ def __iter__(self):
+ for storage in self.storages:
+ yield from storage
+
+ def __len__(self):
+ """ Returns the number of files in the storage.
+
+ Warning: Multiple files may represent the same content, so this method
+ does not indicate how many different contents are in the storage.
+ """
+ return sum(map(len, self.storages))
+
+ def add(self, content, obj_id=None, check_presence=True):
+ """ Add a new object to the object storage.
+
+ If the adding step works in all the storages that accept this content,
+ this is a success. Otherwise, the full adding step is an error even if
+ it succeed in some of the storages.
+
+ Args:
+ content: content of the object to be added to the storage.
+ obj_id: checksum of [bytes] using [ID_HASH_ALGO] algorithm. When
+ given, obj_id will be trusted to match the bytes. If missing,
+ obj_id will be computed on the fly.
+ check_presence: indicate if the presence of the content should be
+ verified before adding the file.
+
+ Returns:
+ an id of the object into the storage. As the write-storages are
+ always readable as well, any id will be valid to retrieve a
+ content.
+ """
+ return [storage.add(content, obj_id, check_presence)
+ for storage in self.storages].pop()
+
+ def restore(self, content, obj_id=None):
+ """ Restore a content that have been corrupted.
+
+ This function is identical to add_bytes but does not check if
+ the object id is already in the file system.
+
+ (see "add" method)
+
+ Args:
+ content: content of the object to be added to the storage
+ obj_id: checksums of `bytes` as computed by ID_HASH_ALGO. When
+ given, obj_id will be trusted to match bytes. If missing,
+ obj_id will be computed on the fly.
+
+ Returns:
+ an id of the object into the storage. As the write-storages are
+ always readable as well, any id will be valid to retrieve a
+ content.
+ """
+ return [storage.restore(content, obj_id)
+ for storage in self.storages].pop()
+
+ def get(self, obj_id):
+ """ Retrieve the content of a given object.
+
+ Args:
+ obj_id: object id.
+
+ Returns:
+ the content of the requested object as bytes.
+
+ Raises:
+ ObjNotFoundError: if the requested object is missing.
+ """
+ for storage in self.storages:
+ try:
+ return storage.get(obj_id)
+ except ObjNotFoundError:
+ continue
+ # If no storage contains this content, raise the error
+ raise ObjNotFoundError(obj_id)
+
+ def check(self, obj_id):
+ """ Perform an integrity check for a given object.
+
+ Verify that the file object is in place and that the gziped content
+ matches the object id.
+
+ Args:
+ obj_id: object id.
+
+ Raises:
+ ObjNotFoundError: if the requested object is missing.
+ Error: if the request object is corrupted.
+ """
+ nb_present = 0
+ for storage in self.storages:
+ try:
+ storage.check(obj_id)
+ except ObjNotFoundError:
+ continue
+ else:
+ nb_present += 1
+ # If there is an Error because of a corrupted file, then let it pass.
+
+ # Raise the ObjNotFoundError only if the content coulnd't be found in
+ # all the storages.
+ if nb_present == 0:
+ raise ObjNotFoundError(obj_id)
+
+ def get_random(self, batch_size):
+ """ Get random ids of existing contents
+
+ This method is used in order to get random ids to perform
+ content integrity verifications on random contents.
+
+ Attributes:
+ batch_size (int): Number of ids that will be given
+
+ Yields:
+ An iterable of ids of contents that are in the current object
+ storage.
+ """
+ storages_set = [storage for storage in self.storages
+ if len(storage) > 0]
+ if len(storages_set) <= 0:
+ return []
+
+ while storages_set:
+ storage = random.choice(storages_set)
+ try:
+ return storage.get_random(batch_size)
+ except NotImplementedError:
+ storages_set.remove(storage)
+ # There is no storage that allow the get_random operation
+ raise NotImplementedError(
+ "There is no storage implementation into the multiplexer that "
+ "support the 'get_random' operation"
+ )
diff --git a/swh/storage/tests/test_multiplexer_filter.py b/swh/storage/tests/test_multiplexer_filter.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/test_multiplexer_filter.py
@@ -0,0 +1,373 @@
+# Copyright (C) 2015-2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import unittest
+
+from string import ascii_lowercase
+
+from nose.tools import istest
+from nose.plugins.attrib import attr
+
+from swh.core import hashutil
+from swh.storage.exc import ObjNotFoundError, Error
+from swh.storage.objstorage import ObjStorage
+from swh.storage.objstorage.multiplexer.filter import (add_filter, read_only,
+ id_prefix, id_regex)
+
+
+def get_random_content():
+ return bytes(''.join(random.sample(ascii_lowercase, 10)), 'utf8')
+
+
+class MockObjStorage(ObjStorage):
+ """ Mock an object storage for testing the filters.
+ """
+ def __init__(self):
+ self.objects = {}
+
+ def __contains__(self, obj_id):
+ return obj_id in self.objects
+
+ def __len__(self):
+ return len(self.objects)
+
+ def __iter__(self):
+ return iter(self.objects)
+
+ def id(self, content):
+ # Id is the content itself for easily choose the id of
+ # a content for filtering.
+ return hashutil.hashdata(content)['sha1']
+
+ def add(self, content, obj_id=None, check_presence=True):
+ if obj_id is None:
+ obj_id = self.id(content)
+
+ if check_presence and obj_id in self.objects:
+ return obj_id
+
+ self.objects[obj_id] = content
+ return obj_id
+
+ def restore(self, content, obj_id=None):
+ return self.add(content, obj_id, check_presence=False)
+
+ def get(self, obj_id):
+ if obj_id not in self:
+ raise ObjNotFoundError(obj_id)
+ return self.objects[obj_id]
+
+ def check(self, obj_id):
+ if obj_id not in self:
+ raise ObjNotFoundError(obj_id)
+ if obj_id != self.id(self.objects[obj_id]):
+ raise Error(obj_id)
+
+ def get_random(self, batch_size):
+ batch_size = min(len(self), batch_size)
+ return random.sample(list(self.objects), batch_size)
+
+
+@attr('!db')
+class MixinTestReadFilter(unittest.TestCase):
+ # Read only filter should not allow writing
+
+ def setUp(self):
+ super().setUp()
+ storage = MockObjStorage()
+
+ self.valid_content = b'pre-existing content'
+ self.invalid_content = b'invalid_content'
+ self.true_invalid_content = b'Anything that is not correct'
+ self.absent_content = b'non-existent content'
+ # Create a valid content.
+ self.valid_id = storage.add(self.valid_content)
+ # Create an invalid id and add a content with it.
+ self.invalid_id = storage.id(self.true_invalid_content)
+ storage.add(self.invalid_content, obj_id=self.invalid_id)
+ # Compute an id for a non-existing content.
+ self.absent_id = storage.id(self.absent_content)
+
+ self.storage = add_filter(storage, read_only())
+
+ @istest
+ def can_contains(self):
+ self.assertTrue(self.valid_id in self.storage)
+ self.assertTrue(self.invalid_id in self.storage)
+ self.assertFalse(self.absent_id in self.storage)
+
+ @istest
+ def can_iter(self):
+ self.assertIn(self.valid_id, iter(self.storage))
+ self.assertIn(self.invalid_id, iter(self.storage))
+
+ @istest
+ def can_len(self):
+ self.assertEqual(2, len(self.storage))
+
+ @istest
+ def can_get(self):
+ self.assertEqual(self.valid_content, self.storage.get(self.valid_id))
+ self.assertEqual(self.invalid_content,
+ self.storage.get(self.invalid_id))
+
+ @istest
+ def can_check(self):
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.absent_id)
+ with self.assertRaises(Error):
+ self.storage.check(self.invalid_id)
+ self.storage.check(self.valid_id)
+
+ @istest
+ def can_get_random(self):
+ self.assertEqual(1, len(self.storage.get_random(1)))
+ self.assertEqual(len(self.storage), len(self.storage.get_random(1000)))
+
+ @istest
+ def cannot_add(self):
+ new_id = self.storage.add(b'New content')
+ result = self.storage.add(self.valid_content, self.valid_id)
+ self.assertNotIn(new_id, self.storage)
+ self.assertIsNone(result)
+
+ @istest
+ def cannot_restore(self):
+ new_id = self.storage.restore(b'New content')
+ result = self.storage.restore(self.valid_content, self.valid_id)
+ self.assertNotIn(new_id, self.storage)
+ self.assertIsNone(result)
+
+
+class MixinTestIdFilter():
+ """ Mixin class that tests the filters based on filter.IdFilter
+
+ Methods "make_valid", "make_invalid" and "filter_storage" must be
+ implemented by subclasses.
+ """
+
+ def setUp(self):
+ super().setUp()
+ # Use a hack here : as the mock uses the content as id, it is easy to
+ # create contents that are filtered or not.
+ self.prefix = '71'
+ storage = MockObjStorage()
+
+ # Make the storage filtered
+ self.base_storage = storage
+ self.storage = self.filter_storage(storage)
+
+ # Present content with valid id
+ self.present_valid_content = self.ensure_valid(b'yroqdtotji')
+ self.present_valid_id = storage.id(self.present_valid_content)
+
+ # Present content with invalid id
+ self.present_invalid_content = self.ensure_invalid(b'glxddlmmzb')
+ self.present_invalid_id = storage.id(self.present_invalid_content)
+
+ # Missing content with valid id
+ self.missing_valid_content = self.ensure_valid(b'rmzkdclkez')
+ self.missing_valid_id = storage.id(self.missing_valid_content)
+
+ # Missing content with invalid id
+ self.missing_invalid_content = self.ensure_invalid(b'hlejfuginh')
+ self.missing_invalid_id = storage.id(self.missing_invalid_content)
+
+ # Present corrupted content with valid id
+ self.present_corrupted_valid_content = self.ensure_valid(b'cdsjwnpaij')
+ self.true_present_corrupted_valid_content = self.ensure_valid(
+ b'mgsdpawcrr')
+ self.present_corrupted_valid_id = storage.id(
+ self.true_present_corrupted_valid_content)
+
+ # Present corrupted content with invalid id
+ self.present_corrupted_invalid_content = self.ensure_invalid(
+ b'pspjljnrco')
+ self.true_present_corrupted_invalid_content = self.ensure_invalid(
+ b'rjocbnnbso')
+ self.present_corrupted_invalid_id = storage.id(
+ self.true_present_corrupted_invalid_content)
+
+ # Missing (potentially) corrupted content with valid id
+ self.missing_corrupted_valid_content = self.ensure_valid(
+ b'zxkokfgtou')
+ self.true_missing_corrupted_valid_content = self.ensure_valid(
+ b'royoncooqa')
+ self.missing_corrupted_valid_id = storage.id(
+ self.true_missing_corrupted_valid_content)
+
+ # Missing (potentially) corrupted content with invalid id
+ self.missing_corrupted_invalid_content = self.ensure_invalid(
+ b'hxaxnrmnyk')
+ self.true_missing_corrupted_invalid_content = self.ensure_invalid(
+ b'qhbolyuifr')
+ self.missing_corrupted_invalid_id = storage.id(
+ self.true_missing_corrupted_invalid_content)
+
+ # Add the content that are supposed to be present
+ storage.add(self.present_valid_content)
+ storage.add(self.present_invalid_content)
+ storage.add(self.present_corrupted_valid_content,
+ obj_id=self.present_corrupted_valid_id)
+ storage.add(self.present_corrupted_invalid_content,
+ obj_id=self.present_corrupted_invalid_id)
+
+ def filter_storage(self, storage):
+ raise NotImplementedError(
+ 'Id_filter test class must have a filter_storage method')
+
+ def ensure_valid(self, content=None):
+ if content is None:
+ content = get_random_content()
+ while not self.storage.is_valid(self.base_storage.id(content)):
+ content = get_random_content()
+ return content
+
+ def ensure_invalid(self, content=None):
+ if content is None:
+ content = get_random_content()
+ while self.storage.is_valid(self.base_storage.id(content)):
+ content = get_random_content()
+ return content
+
+ @istest
+ def contains(self):
+ # Both contents are present, but the invalid one should be ignored.
+ self.assertTrue(self.present_valid_id in self.storage)
+ self.assertFalse(self.present_invalid_id in self.storage)
+ self.assertFalse(self.missing_valid_id in self.storage)
+ self.assertFalse(self.missing_invalid_id in self.storage)
+ self.assertTrue(self.present_corrupted_valid_id in self.storage)
+ self.assertFalse(self.present_corrupted_invalid_id in self.storage)
+ self.assertFalse(self.missing_corrupted_valid_id in self.storage)
+ self.assertFalse(self.missing_corrupted_invalid_id in self.storage)
+
+ @istest
+ def iter(self):
+ self.assertIn(self.present_valid_id, iter(self.storage))
+ self.assertNotIn(self.present_invalid_id, iter(self.storage))
+ self.assertNotIn(self.missing_valid_id, iter(self.storage))
+ self.assertNotIn(self.missing_invalid_id, iter(self.storage))
+ self.assertIn(self.present_corrupted_valid_id, iter(self.storage))
+ self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage))
+ self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage))
+ self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage))
+
+ @istest
+ def len(self):
+ # Four contents are present, but only two should be valid.
+ self.assertEqual(2, len(self.storage))
+
+ @istest
+ def get(self):
+ self.assertEqual(self.present_valid_content,
+ self.storage.get(self.present_valid_id))
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.get(self.present_invalid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.get(self.missing_valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.get(self.missing_invalid_id)
+ self.assertEqual(self.present_corrupted_valid_content,
+ self.storage.get(self.present_corrupted_valid_id))
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.get(self.present_corrupted_invalid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.get(self.missing_corrupted_valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.get(self.missing_corrupted_invalid_id)
+
+ @istest
+ def check(self):
+ self.storage.check(self.present_valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.present_invalid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.missing_valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.missing_invalid_id)
+ with self.assertRaises(Error):
+ self.storage.check(self.present_corrupted_valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.present_corrupted_invalid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.missing_corrupted_valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(self.missing_corrupted_invalid_id)
+
+ @istest
+ def get_random(self):
+ self.assertEqual(0, len(list(self.storage.get_random(0))))
+
+ random_content = list(self.storage.get_random(1000))
+ self.assertIn(self.present_valid_id, random_content)
+ self.assertNotIn(self.present_invalid_id, random_content)
+ self.assertNotIn(self.missing_valid_id, random_content)
+ self.assertNotIn(self.missing_invalid_id, random_content)
+ self.assertIn(self.present_corrupted_valid_id, random_content)
+ self.assertNotIn(self.present_corrupted_invalid_id, random_content)
+ self.assertNotIn(self.missing_corrupted_valid_id, random_content)
+ self.assertNotIn(self.missing_corrupted_invalid_id, random_content)
+
+ @istest
+ def add(self):
+ # Add valid and invalid contents to the storage and check their
+ # presence with the unfiltered storage.
+ valid_content = self.ensure_valid(b'ulepsrjbgt')
+ valid_id = self.base_storage.id(valid_content)
+ invalid_content = self.ensure_invalid(b'znvghkjked')
+ invalid_id = self.base_storage.id(invalid_content)
+ self.storage.add(valid_content)
+ self.storage.add(invalid_content)
+ self.assertTrue(valid_id in self.base_storage)
+ self.assertFalse(invalid_id in self.base_storage)
+
+ @istest
+ def restore(self):
+ # Add corrupted content to the storage and the try to restore it
+ valid_content = self.ensure_valid(b'ulepsrjbgt')
+ valid_id = self.base_storage.id(valid_content)
+ corrupted_content = self.ensure_valid(b'ltjkjsloyb')
+ corrupted_id = self.base_storage.id(corrupted_content)
+ self.storage.add(corrupted_content, obj_id=valid_id)
+ with self.assertRaises(ObjNotFoundError):
+ self.storage.check(corrupted_id)
+ with self.assertRaises(Error):
+ self.storage.check(valid_id)
+ self.storage.restore(valid_content)
+ self.storage.check(valid_id)
+
+
+@attr('!db')
+class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase):
+ def setUp(self):
+ self.prefix = b'71'
+ super().setUp()
+
+ def ensure_valid(self, content):
+ obj_id = hashutil.hashdata(content)['sha1']
+ hex_obj_id = hashutil.hash_to_hex(obj_id)
+ self.assertTrue(hex_obj_id.startswith(self.prefix))
+ return content
+
+ def ensure_invalid(self, content):
+ obj_id = hashutil.hashdata(content)['sha1']
+ hex_obj_id = hashutil.hash_to_hex(obj_id)
+ self.assertFalse(hex_obj_id.startswith(self.prefix))
+ return content
+
+ def filter_storage(self, storage):
+ return add_filter(storage, id_prefix(self.prefix))
+
+
+@attr('!db')
+class TestRegexFilter(MixinTestIdFilter, unittest.TestCase):
+ def setUp(self):
+ self.regex = r'[a-f][0-9].*'
+ super().setUp()
+
+ def filter_storage(self, storage):
+ return add_filter(storage, id_regex(self.regex))
diff --git a/swh/storage/tests/test_objstorage_multiplexer.py b/swh/storage/tests/test_objstorage_multiplexer.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/test_objstorage_multiplexer.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2015-2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import tempfile
+import unittest
+
+from nose.tools import istest
+
+from swh.storage.objstorage import PathSlicingObjStorage
+from swh.storage.objstorage.multiplexer import MultiplexerObjStorage
+from swh.storage.objstorage.multiplexer.filter import add_filter, read_only
+
+from objstorage_testing import ObjStorageTestFixture
+
+
+class TestMultiplexerObjStorage(ObjStorageTestFixture, unittest.TestCase):
+
+ def setUp(self):
+ super().setUp()
+ self.storage_v1 = PathSlicingObjStorage(tempfile.mkdtemp(), '0:2/2:4')
+ self.storage_v2 = PathSlicingObjStorage(tempfile.mkdtemp(), '0:1/0:5')
+
+ self.r_storage = add_filter(self.storage_v1, read_only())
+ self.w_storage = self.storage_v2
+ self.storage = MultiplexerObjStorage([self.r_storage, self.w_storage])
+
+ @istest
+ def contains(self):
+ content_p, obj_id_p = self.hash_content(b'contains_present')
+ content_m, obj_id_m = self.hash_content(b'contains_missing')
+ self.storage.add(content_p, obj_id=obj_id_p)
+ self.assertIn(obj_id_p, self.storage)
+ self.assertNotIn(obj_id_m, self.storage)
+
+ @istest
+ def iter(self):
+ content, obj_id = self.hash_content(b'iter')
+ self.assertEqual(list(iter(self.storage)), [])
+ self.storage.add(content, obj_id=obj_id)
+ self.assertEqual(list(iter(self.storage)), [obj_id])
+
+ @istest
+ def len(self):
+ content, obj_id = self.hash_content(b'len')
+ self.assertEqual(len(self.storage), 0)
+ self.storage.add(content, obj_id=obj_id)
+ self.assertEqual(len(self.storage), 1)
+
+ @istest
+ def len_multiple(self):
+ content, obj_id = self.hash_content(b'len_multiple')
+ # Add a content to the read-only storage
+ self.storage_v1.add(content)
+ self.assertEqual(len(self.storage), 1)
+ # By adding the same content to the global storage, it should be
+ # Replicated.
+ # len() behavior is to indicates the number of files, not unique
+ # contents.
+ self.storage.add(content)
+ self.assertEqual(len(self.storage), 2)
+
+ @istest
+ def get_random_contents(self):
+ content, obj_id = self.hash_content(b'get_random_content')
+ self.storage.add(content)
+ random_contents = list(self.storage.get_random(1))
+ self.assertEqual(1, len(random_contents))
+ self.assertIn(obj_id, random_contents)
+
+ @istest
+ def access_readonly(self):
+ # Add a content to the readonly storage
+ content, obj_id = self.hash_content(b'content in read-only')
+ self.storage_v1.add(content)
+ # Try to retrieve it on the main storage
+ self.assertIn(obj_id, self.storage)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 5:52 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217554
Attached To
D53: Implement the object storage multiplexer
Event Timeline
Log In to Comment