D53.id188.diff
No OneTemporary
Actions

Size

33 KB

Subscribers

None

D53.id188.diff
View Options

	diff --git a/swh/storage/objstorage/multiplexer/__init__.py b/swh/storage/objstorage/multiplexer/__init__.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/objstorage/multiplexer/__init__.py
	@@ -0,0 +1,4 @@
	+from .multiplexer_objstorage import MultiplexerObjStorage
	+
	+
	+__all__ = [MultiplexerObjStorage]
	diff --git a/swh/storage/objstorage/multiplexer/filter/__init__.py b/swh/storage/objstorage/multiplexer/filter/__init__.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/objstorage/multiplexer/filter/__init__.py
	@@ -0,0 +1,98 @@
	+import functools
	+
	+from .read_write_filter import ReadObjStorageFilter
	+from .id_filter import RegexIdObjStorageFilter, PrefixIdObjStorageFilter
	+
	+
	+_FILTERS_CLASSES = {
	+ 'readonly': ReadObjStorageFilter,
	+ 'regex': RegexIdObjStorageFilter,
	+ 'prefix': PrefixIdObjStorageFilter
	+}
	+
	+
	+_FILTERS_PRIORITY = {
	+ 'readonly': 0,
	+ 'prefix': 1,
	+ 'regex': 2
	+}
	+
	+
	+def read_only():
	+ return {'type': 'readonly'}
	+
	+
	+def id_prefix(prefix):
	+ return {'type': 'prefix', 'prefix': prefix}
	+
	+
	+def id_regex(regex):
	+ return {'type': 'regex', 'regex': regex}
	+
	+
	+def _filter_priority(self, filter_type):
	+ """ Get the priority of this filter.
	+
	+ Priority is a value that indicates if the operation of the
	+ filter is time-consuming (smaller values means quick execution),
	+ or very likely to be almost always the same value (False being small,
	+ and True high).
	+
	+ In case the filters are chained, they will be ordered in a way that
	+ small priorities (quick execution or instantly break the chain) are
	+ executed first.
	+
	+ Default value is 1. Value 0 is recommended for storages that change
	+ behavior only by disabling some operations (making the method return
	+ None).
	+ """
	+ return _FILTERS_PRIORITY.get(filter_type, 1)
	+
	+
	+def add_filter(storage, filter_conf):
	+ """ Add a filter to the given storage.
	+
	+ Args:
	+ storage (ObjStorage): storage which will be filtered.
	+ filter_conf (dict): configuration of an ObjStorageFilter, given as
	+ a dictionnary that contains the keys:
	+ - type: which represent the type of filter, one of the keys
	+ of FILTERS
	+ - Every arguments that this type of filter require.
	+
	+ Returns:
	+ A filtered storage that perform only the valid operations.
	+ """
	+ type = filter_conf['type']
	+ args = {k: v for k, v in filter_conf.items() if k is not 'type'}
	+ filter = _FILTERS_CLASSES[type](storage=storage, **args)
	+ return filter
	+
	+
	+def add_filters(storage, *filter_confs):
	+ """ Add multiple filters to the given storage.
	+
	+ (See filter.add_filter)
	+
	+ Args:
	+ storage (ObjStorage): storage which will be filtered.
	+ filter_confs (list): any number of filter conf, as a dict with:
	+ - type: which represent the type of filter, one of the keys of
	+ FILTERS.
	+ - Every arguments that this type of filter require.
	+
	+ Returns:
	+ A filtered storage that fulfill the requirement of all the given
	+ filters.
	+ """
	+ # Reverse sorting in order to put the filter with biggest priority first.
	+ filter_confs.sort(key=lambda conf: _filter_priority(conf['type']),
	+ reverse=True)
	+
	+ # Add the bigest filter to the storage, and reduce it to accumulate filters
	+ # on top of it, until the smallest (fastest, see filter.filter_priority) is
	+ # added.
	+ return functools.reduce(
	+ lambda stor, conf: add_filter(stor, conf),
	+ [storage] + filter_confs
	+ )
	diff --git a/swh/storage/objstorage/multiplexer/filter/filter.py b/swh/storage/objstorage/multiplexer/filter/filter.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/objstorage/multiplexer/filter/filter.py
	@@ -0,0 +1,48 @@
	+# Copyright (C) 2015-2016 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+from ...objstorage import ObjStorage
	+
	+
	+class ObjStorageFilter(ObjStorage):
	+ """ Base implementation of a filter that allow inputs on ObjStorage or not
	+
	+ This class copy the API of ...objstorage in order to filter the inputs
	+ of this class.
	+ If the operation is allowed, return the result of this operation
	+ applied to the destination implementation. Otherwise, just return
	+ without any operation.
	+
	+ This class is an abstract base class for a classic read/write storage.
	+ Filters can inherit from it and only redefine some methods in order
	+ to change behavior.
	+ """
	+
	+ def __init__(self, storage):
	+ self.storage = storage
	+
	+ def __contains__(self, args, *kwargs):
	+ return self.storage.__contains__(args, *kwargs)
	+
	+ def __iter__(self):
	+ return self.storage.__iter__()
	+
	+ def __len__(self):
	+ return self.storage.__len__()
	+
	+ def add(self, args, *kwargs):
	+ return self.storage.add(args, *kwargs)
	+
	+ def restore(self, args, *kwargs):
	+ return self.storage.restore(args, *kwargs)
	+
	+ def get(self, args, *kwargs):
	+ return self.storage.get(args, *kwargs)
	+
	+ def check(self, args, *kwargs):
	+ return self.storage.check(args, *kwargs)
	+
	+ def get_random(self, args, *kwargs):
	+ return self.storage.get_random(args, *kwargs)
	diff --git a/swh/storage/objstorage/multiplexer/filter/id_filter.py b/swh/storage/objstorage/multiplexer/filter/id_filter.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/objstorage/multiplexer/filter/id_filter.py
	@@ -0,0 +1,99 @@
	+# Copyright (C) 2015-2016 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+import re
	+
	+from swh.core import hashutil
	+
	+from .filter import ObjStorageFilter
	+from ...objstorage import ID_HASH_ALGO
	+from ....exc import ObjNotFoundError
	+
	+
	+def compute_hash(bytes):
	+ """ Compute the hash of the given content.
	+ """
	+ # Checksum is missing, compute it on the fly.
	+ h = hashutil._new_hash(ID_HASH_ALGO, len(bytes))
	+ h.update(bytes)
	+ return h.digest()
	+
	+
	+class IdObjStorageFilter(ObjStorageFilter):
	+ """ Filter that only allow operations if the object id match a requirement.
	+
	+ Even for read operations, check before if the id match the requirements.
	+ This may prevent for unnecesary disk access.
	+ """
	+
	+ def is_valid(self, obj_id):
	+ """ Indicates if the given id is valid.
	+ """
	+ raise NotImplementedError('Implementations of an IdObjStorageFilter '
	+ 'must have a "is_valid" method')
	+
	+ def __contains__(self, obj_id, args, *kwargs):
	+ if self.is_valid(obj_id):
	+ return self.storage.__contains__(args, obj_id=obj_id, *kwargs)
	+ return False
	+
	+ def __len__(self):
	+ return sum(1 for i in [id for id in self.storage if self.is_valid(id)])
	+
	+ def __iter__(self):
	+ yield from filter(lambda id: self.is_valid(id), iter(self.storage))
	+
	+ def add(self, content, obj_id=None, check_presence=True, args, *kwargs):
	+ if obj_id is None:
	+ obj_id = compute_hash(content)
	+ if self.is_valid(obj_id):
	+ return self.storage.add(content, args, obj_id=obj_id, *kwargs)
	+
	+ def restore(self, content, obj_id=None, args, *kwargs):
	+ if obj_id is None:
	+ obj_id = compute_hash(content)
	+ if self.is_valid(obj_id):
	+ return self.storage.restore(content, *args,
	+ obj_id=obj_id, **kwargs)
	+
	+ def get(self, obj_id, args, *kwargs):
	+ if self.is_valid(obj_id):
	+ return self.storage.get(args, obj_id=obj_id, *kwargs)
	+ raise ObjNotFoundError(obj_id)
	+
	+ def check(self, obj_id, args, *kwargs):
	+ if self.is_valid(obj_id):
	+ return self.storage.check(args, obj_id=obj_id, *kwargs)
	+ raise ObjNotFoundError(obj_id)
	+
	+ def get_random(self, args, *kwargs):
	+ yield from filter(lambda id: self.is_valid(id),
	+ self.storage.get_random(args, *kwargs))
	+
	+
	+class RegexIdObjStorageFilter(IdObjStorageFilter):
	+ """ Filter that allow operations if the content's id as hex match a regex.
	+ """
	+
	+ def __init__(self, storage, regex):
	+ super().__init__(storage)
	+ self.regex = re.compile(regex)
	+
	+ def is_valid(self, obj_id):
	+ hex_obj_id = hashutil.hash_to_hex(obj_id)
	+ return self.regex.match(hex_obj_id) is not None
	+
	+
	+class PrefixIdObjStorageFilter(IdObjStorageFilter):
	+ """ Filter that allow operations if the hexlified id have a given prefix.
	+ """
	+
	+ def __init__(self, storage, prefix):
	+ super().__init__(storage)
	+ self.prefix = str(prefix)
	+
	+ def is_valid(self, obj_id):
	+ hex_obj_id = hashutil.hash_to_hex(obj_id)
	+ return str(hex_obj_id).startswith(self.prefix)
	diff --git a/swh/storage/objstorage/multiplexer/filter/read_write_filter.py b/swh/storage/objstorage/multiplexer/filter/read_write_filter.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/objstorage/multiplexer/filter/read_write_filter.py
	@@ -0,0 +1,17 @@
	+# Copyright (C) 2015-2016 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+from .filter import ObjStorageFilter
	+
	+
	+class ReadObjStorageFilter(ObjStorageFilter):
	+ """ Filter that disable write operation of the storage.
	+ """
	+
	+ def add(self, args, *kwargs):
	+ return
	+
	+ def restore(self, args, *kwargs):
	+ return
	diff --git a/swh/storage/objstorage/multiplexer/multiplexer_objstorage.py b/swh/storage/objstorage/multiplexer/multiplexer_objstorage.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/objstorage/multiplexer/multiplexer_objstorage.py
	@@ -0,0 +1,166 @@
	+# Copyright (C) 2015-2016 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+import random
	+
	+from ..objstorage import ObjStorage
	+from ...exc import ObjNotFoundError
	+
	+
	+class MultiplexerObjStorage(ObjStorage):
	+ """ Implementation of ObjStorage that distribute between multiple storages
	+
	+ The multiplexer object storage allows an input to be demultiplexed
	+ among multiple storages that will or will not accept it by themselves
	+ (see .filter package).
	+
	+ As the ids can be differents, no pre-computed ids should be submitted.
	+ Also, there are no guarantees that the returned ids can be used directly
	+ into the storages that the multiplexer manage.
	+ """
	+
	+ def __init__(self, storages):
	+ self.storages = storages
	+
	+ def __contains__(self, obj_id):
	+ for storage in self.storages:
	+ if obj_id in storage:
	+ return True
	+ return False
	+
	+ def __iter__(self):
	+ for storage in self.storages:
	+ yield from storage
	+
	+ def __len__(self):
	+ """ Returns the number of files in the storage.
	+
	+ Warning: Multiple files may represent the same content, so this method
	+ does not indicate how many different contents are in the storage.
	+ """
	+ return sum(map(len, self.storages))
	+
	+ def add(self, content, obj_id=None, check_presence=True):
	+ """ Add a new object to the object storage.
	+
	+ If the adding step works in all the storages that accept this content,
	+ this is a success. Otherwise, the full adding step is an error even if
	+ it succeed in some of the storages.
	+
	+ Args:
	+ content: content of the object to be added to the storage.
	+ obj_id: checksum of [bytes] using [ID_HASH_ALGO] algorithm. When
	+ given, obj_id will be trusted to match the bytes. If missing,
	+ obj_id will be computed on the fly.
	+ check_presence: indicate if the presence of the content should be
	+ verified before adding the file.
	+
	+ Returns:
	+ an id of the object into the storage. As the write-storages are
	+ always readable as well, any id will be valid to retrieve a
	+ content.
	+ """
	+ return [storage.add(content, obj_id, check_presence)
	+ for storage in self.storages].pop()
	+
	+ def restore(self, content, obj_id=None):
	+ """ Restore a content that have been corrupted.
	+
	+ This function is identical to add_bytes but does not check if
	+ the object id is already in the file system.
	+
	+ (see "add" method)
	+
	+ Args:
	+ content: content of the object to be added to the storage
	+ obj_id: checksums of `bytes` as computed by ID_HASH_ALGO. When
	+ given, obj_id will be trusted to match bytes. If missing,
	+ obj_id will be computed on the fly.
	+
	+ Returns:
	+ an id of the object into the storage. As the write-storages are
	+ always readable as well, any id will be valid to retrieve a
	+ content.
	+ """
	+ return [storage.restore(content, obj_id)
	+ for storage in self.storages].pop()
	+
	+ def get(self, obj_id):
	+ """ Retrieve the content of a given object.
	+
	+ Args:
	+ obj_id: object id.
	+
	+ Returns:
	+ the content of the requested object as bytes.
	+
	+ Raises:
	+ ObjNotFoundError: if the requested object is missing.
	+ """
	+ for storage in self.storages:
	+ try:
	+ return storage.get(obj_id)
	+ except ObjNotFoundError:
	+ continue
	+ # If no storage contains this content, raise the error
	+ raise ObjNotFoundError(obj_id)
	+
	+ def check(self, obj_id):
	+ """ Perform an integrity check for a given object.
	+
	+ Verify that the file object is in place and that the gziped content
	+ matches the object id.
	+
	+ Args:
	+ obj_id: object id.
	+
	+ Raises:
	+ ObjNotFoundError: if the requested object is missing.
	+ Error: if the request object is corrupted.
	+ """
	+ nb_present = 0
	+ for storage in self.storages:
	+ try:
	+ storage.check(obj_id)
	+ except ObjNotFoundError:
	+ continue
	+ else:
	+ nb_present += 1
	+ # If there is an Error because of a corrupted file, then let it pass.
	+
	+ # Raise the ObjNotFoundError only if the content coulnd't be found in
	+ # all the storages.
	+ if nb_present == 0:
	+ raise ObjNotFoundError(obj_id)
	+
	+ def get_random(self, batch_size):
	+ """ Get random ids of existing contents
	+
	+ This method is used in order to get random ids to perform
	+ content integrity verifications on random contents.
	+
	+ Attributes:
	+ batch_size (int): Number of ids that will be given
	+
	+ Yields:
	+ An iterable of ids of contents that are in the current object
	+ storage.
	+ """
	+ storages_set = [storage for storage in self.storages
	+ if len(storage) > 0]
	+ if len(storages_set) <= 0:
	+ return []
	+
	+ while storages_set:
	+ storage = random.choice(storages_set)
	+ try:
	+ return storage.get_random(batch_size)
	+ except NotImplementedError:
	+ storages_set.remove(storage)
	+ # There is no storage that allow the get_random operation
	+ raise NotImplementedError(
	+ "There is no storage implementation into the multiplexer that "
	+ "support the 'get_random' operation"
	+ )
	diff --git a/swh/storage/tests/test_multiplexer_filter.py b/swh/storage/tests/test_multiplexer_filter.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/tests/test_multiplexer_filter.py
	@@ -0,0 +1,373 @@
	+# Copyright (C) 2015-2016 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+import random
	+import unittest
	+
	+from string import ascii_lowercase
	+
	+from nose.tools import istest
	+from nose.plugins.attrib import attr
	+
	+from swh.core import hashutil
	+from swh.storage.exc import ObjNotFoundError, Error
	+from swh.storage.objstorage import ObjStorage
	+from swh.storage.objstorage.multiplexer.filter import (add_filter, read_only,
	+ id_prefix, id_regex)
	+
	+
	+def get_random_content():
	+ return bytes(''.join(random.sample(ascii_lowercase, 10)), 'utf8')
	+
	+
	+class MockObjStorage(ObjStorage):
	+ """ Mock an object storage for testing the filters.
	+ """
	+ def __init__(self):
	+ self.objects = {}
	+
	+ def __contains__(self, obj_id):
	+ return obj_id in self.objects
	+
	+ def __len__(self):
	+ return len(self.objects)
	+
	+ def __iter__(self):
	+ return iter(self.objects)
	+
	+ def id(self, content):
	+ # Id is the content itself for easily choose the id of
	+ # a content for filtering.
	+ return hashutil.hashdata(content)['sha1']
	+
	+ def add(self, content, obj_id=None, check_presence=True):
	+ if obj_id is None:
	+ obj_id = self.id(content)
	+
	+ if check_presence and obj_id in self.objects:
	+ return obj_id
	+
	+ self.objects[obj_id] = content
	+ return obj_id
	+
	+ def restore(self, content, obj_id=None):
	+ return self.add(content, obj_id, check_presence=False)
	+
	+ def get(self, obj_id):
	+ if obj_id not in self:
	+ raise ObjNotFoundError(obj_id)
	+ return self.objects[obj_id]
	+
	+ def check(self, obj_id):
	+ if obj_id not in self:
	+ raise ObjNotFoundError(obj_id)
	+ if obj_id != self.id(self.objects[obj_id]):
	+ raise Error(obj_id)
	+
	+ def get_random(self, batch_size):
	+ batch_size = min(len(self), batch_size)
	+ return random.sample(list(self.objects), batch_size)
	+
	+
	+@attr('!db')
	+class MixinTestReadFilter(unittest.TestCase):
	+ # Read only filter should not allow writing
	+
	+ def setUp(self):
	+ super().setUp()
	+ storage = MockObjStorage()
	+
	+ self.valid_content = b'pre-existing content'
	+ self.invalid_content = b'invalid_content'
	+ self.true_invalid_content = b'Anything that is not correct'
	+ self.absent_content = b'non-existent content'
	+ # Create a valid content.
	+ self.valid_id = storage.add(self.valid_content)
	+ # Create an invalid id and add a content with it.
	+ self.invalid_id = storage.id(self.true_invalid_content)
	+ storage.add(self.invalid_content, obj_id=self.invalid_id)
	+ # Compute an id for a non-existing content.
	+ self.absent_id = storage.id(self.absent_content)
	+
	+ self.storage = add_filter(storage, read_only())
	+
	+ @istest
	+ def can_contains(self):
	+ self.assertTrue(self.valid_id in self.storage)
	+ self.assertTrue(self.invalid_id in self.storage)
	+ self.assertFalse(self.absent_id in self.storage)
	+
	+ @istest
	+ def can_iter(self):
	+ self.assertIn(self.valid_id, iter(self.storage))
	+ self.assertIn(self.invalid_id, iter(self.storage))
	+
	+ @istest
	+ def can_len(self):
	+ self.assertEqual(2, len(self.storage))
	+
	+ @istest
	+ def can_get(self):
	+ self.assertEqual(self.valid_content, self.storage.get(self.valid_id))
	+ self.assertEqual(self.invalid_content,
	+ self.storage.get(self.invalid_id))
	+
	+ @istest
	+ def can_check(self):
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.absent_id)
	+ with self.assertRaises(Error):
	+ self.storage.check(self.invalid_id)
	+ self.storage.check(self.valid_id)
	+
	+ @istest
	+ def can_get_random(self):
	+ self.assertEqual(1, len(self.storage.get_random(1)))
	+ self.assertEqual(len(self.storage), len(self.storage.get_random(1000)))
	+
	+ @istest
	+ def cannot_add(self):
	+ new_id = self.storage.add(b'New content')
	+ result = self.storage.add(self.valid_content, self.valid_id)
	+ self.assertNotIn(new_id, self.storage)
	+ self.assertIsNone(result)
	+
	+ @istest
	+ def cannot_restore(self):
	+ new_id = self.storage.restore(b'New content')
	+ result = self.storage.restore(self.valid_content, self.valid_id)
	+ self.assertNotIn(new_id, self.storage)
	+ self.assertIsNone(result)
	+
	+
	+class MixinTestIdFilter():
	+ """ Mixin class that tests the filters based on filter.IdFilter
	+
	+ Methods "make_valid", "make_invalid" and "filter_storage" must be
	+ implemented by subclasses.
	+ """
	+
	+ def setUp(self):
	+ super().setUp()
	+ # Use a hack here : as the mock uses the content as id, it is easy to
	+ # create contents that are filtered or not.
	+ self.prefix = '71'
	+ storage = MockObjStorage()
	+
	+ # Make the storage filtered
	+ self.base_storage = storage
	+ self.storage = self.filter_storage(storage)
	+
	+ # Present content with valid id
	+ self.present_valid_content = self.ensure_valid(b'yroqdtotji')
	+ self.present_valid_id = storage.id(self.present_valid_content)
	+
	+ # Present content with invalid id
	+ self.present_invalid_content = self.ensure_invalid(b'glxddlmmzb')
	+ self.present_invalid_id = storage.id(self.present_invalid_content)
	+
	+ # Missing content with valid id
	+ self.missing_valid_content = self.ensure_valid(b'rmzkdclkez')
	+ self.missing_valid_id = storage.id(self.missing_valid_content)
	+
	+ # Missing content with invalid id
	+ self.missing_invalid_content = self.ensure_invalid(b'hlejfuginh')
	+ self.missing_invalid_id = storage.id(self.missing_invalid_content)
	+
	+ # Present corrupted content with valid id
	+ self.present_corrupted_valid_content = self.ensure_valid(b'cdsjwnpaij')
	+ self.true_present_corrupted_valid_content = self.ensure_valid(
	+ b'mgsdpawcrr')
	+ self.present_corrupted_valid_id = storage.id(
	+ self.true_present_corrupted_valid_content)
	+
	+ # Present corrupted content with invalid id
	+ self.present_corrupted_invalid_content = self.ensure_invalid(
	+ b'pspjljnrco')
	+ self.true_present_corrupted_invalid_content = self.ensure_invalid(
	+ b'rjocbnnbso')
	+ self.present_corrupted_invalid_id = storage.id(
	+ self.true_present_corrupted_invalid_content)
	+
	+ # Missing (potentially) corrupted content with valid id
	+ self.missing_corrupted_valid_content = self.ensure_valid(
	+ b'zxkokfgtou')
	+ self.true_missing_corrupted_valid_content = self.ensure_valid(
	+ b'royoncooqa')
	+ self.missing_corrupted_valid_id = storage.id(
	+ self.true_missing_corrupted_valid_content)
	+
	+ # Missing (potentially) corrupted content with invalid id
	+ self.missing_corrupted_invalid_content = self.ensure_invalid(
	+ b'hxaxnrmnyk')
	+ self.true_missing_corrupted_invalid_content = self.ensure_invalid(
	+ b'qhbolyuifr')
	+ self.missing_corrupted_invalid_id = storage.id(
	+ self.true_missing_corrupted_invalid_content)
	+
	+ # Add the content that are supposed to be present
	+ storage.add(self.present_valid_content)
	+ storage.add(self.present_invalid_content)
	+ storage.add(self.present_corrupted_valid_content,
	+ obj_id=self.present_corrupted_valid_id)
	+ storage.add(self.present_corrupted_invalid_content,
	+ obj_id=self.present_corrupted_invalid_id)
	+
	+ def filter_storage(self, storage):
	+ raise NotImplementedError(
	+ 'Id_filter test class must have a filter_storage method')
	+
	+ def ensure_valid(self, content=None):
	+ if content is None:
	+ content = get_random_content()
	+ while not self.storage.is_valid(self.base_storage.id(content)):
	+ content = get_random_content()
	+ return content
	+
	+ def ensure_invalid(self, content=None):
	+ if content is None:
	+ content = get_random_content()
	+ while self.storage.is_valid(self.base_storage.id(content)):
	+ content = get_random_content()
	+ return content
	+
	+ @istest
	+ def contains(self):
	+ # Both contents are present, but the invalid one should be ignored.
	+ self.assertTrue(self.present_valid_id in self.storage)
	+ self.assertFalse(self.present_invalid_id in self.storage)
	+ self.assertFalse(self.missing_valid_id in self.storage)
	+ self.assertFalse(self.missing_invalid_id in self.storage)
	+ self.assertTrue(self.present_corrupted_valid_id in self.storage)
	+ self.assertFalse(self.present_corrupted_invalid_id in self.storage)
	+ self.assertFalse(self.missing_corrupted_valid_id in self.storage)
	+ self.assertFalse(self.missing_corrupted_invalid_id in self.storage)
	+
	+ @istest
	+ def iter(self):
	+ self.assertIn(self.present_valid_id, iter(self.storage))
	+ self.assertNotIn(self.present_invalid_id, iter(self.storage))
	+ self.assertNotIn(self.missing_valid_id, iter(self.storage))
	+ self.assertNotIn(self.missing_invalid_id, iter(self.storage))
	+ self.assertIn(self.present_corrupted_valid_id, iter(self.storage))
	+ self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage))
	+ self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage))
	+ self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage))
	+
	+ @istest
	+ def len(self):
	+ # Four contents are present, but only two should be valid.
	+ self.assertEqual(2, len(self.storage))
	+
	+ @istest
	+ def get(self):
	+ self.assertEqual(self.present_valid_content,
	+ self.storage.get(self.present_valid_id))
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.get(self.present_invalid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.get(self.missing_valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.get(self.missing_invalid_id)
	+ self.assertEqual(self.present_corrupted_valid_content,
	+ self.storage.get(self.present_corrupted_valid_id))
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.get(self.present_corrupted_invalid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.get(self.missing_corrupted_valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.get(self.missing_corrupted_invalid_id)
	+
	+ @istest
	+ def check(self):
	+ self.storage.check(self.present_valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.present_invalid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.missing_valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.missing_invalid_id)
	+ with self.assertRaises(Error):
	+ self.storage.check(self.present_corrupted_valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.present_corrupted_invalid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.missing_corrupted_valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(self.missing_corrupted_invalid_id)
	+
	+ @istest
	+ def get_random(self):
	+ self.assertEqual(0, len(list(self.storage.get_random(0))))
	+
	+ random_content = list(self.storage.get_random(1000))
	+ self.assertIn(self.present_valid_id, random_content)
	+ self.assertNotIn(self.present_invalid_id, random_content)
	+ self.assertNotIn(self.missing_valid_id, random_content)
	+ self.assertNotIn(self.missing_invalid_id, random_content)
	+ self.assertIn(self.present_corrupted_valid_id, random_content)
	+ self.assertNotIn(self.present_corrupted_invalid_id, random_content)
	+ self.assertNotIn(self.missing_corrupted_valid_id, random_content)
	+ self.assertNotIn(self.missing_corrupted_invalid_id, random_content)
	+
	+ @istest
	+ def add(self):
	+ # Add valid and invalid contents to the storage and check their
	+ # presence with the unfiltered storage.
	+ valid_content = self.ensure_valid(b'ulepsrjbgt')
	+ valid_id = self.base_storage.id(valid_content)
	+ invalid_content = self.ensure_invalid(b'znvghkjked')
	+ invalid_id = self.base_storage.id(invalid_content)
	+ self.storage.add(valid_content)
	+ self.storage.add(invalid_content)
	+ self.assertTrue(valid_id in self.base_storage)
	+ self.assertFalse(invalid_id in self.base_storage)
	+
	+ @istest
	+ def restore(self):
	+ # Add corrupted content to the storage and the try to restore it
	+ valid_content = self.ensure_valid(b'ulepsrjbgt')
	+ valid_id = self.base_storage.id(valid_content)
	+ corrupted_content = self.ensure_valid(b'ltjkjsloyb')
	+ corrupted_id = self.base_storage.id(corrupted_content)
	+ self.storage.add(corrupted_content, obj_id=valid_id)
	+ with self.assertRaises(ObjNotFoundError):
	+ self.storage.check(corrupted_id)
	+ with self.assertRaises(Error):
	+ self.storage.check(valid_id)
	+ self.storage.restore(valid_content)
	+ self.storage.check(valid_id)
	+
	+
	+@attr('!db')
	+class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase):
	+ def setUp(self):
	+ self.prefix = b'71'
	+ super().setUp()
	+
	+ def ensure_valid(self, content):
	+ obj_id = hashutil.hashdata(content)['sha1']
	+ hex_obj_id = hashutil.hash_to_hex(obj_id)
	+ self.assertTrue(hex_obj_id.startswith(self.prefix))
	+ return content
	+
	+ def ensure_invalid(self, content):
	+ obj_id = hashutil.hashdata(content)['sha1']
	+ hex_obj_id = hashutil.hash_to_hex(obj_id)
	+ self.assertFalse(hex_obj_id.startswith(self.prefix))
	+ return content
	+
	+ def filter_storage(self, storage):
	+ return add_filter(storage, id_prefix(self.prefix))
	+
	+
	+@attr('!db')
	+class TestRegexFilter(MixinTestIdFilter, unittest.TestCase):
	+ def setUp(self):
	+ self.regex = r'[a-f][0-9].*'
	+ super().setUp()
	+
	+ def filter_storage(self, storage):
	+ return add_filter(storage, id_regex(self.regex))
	diff --git a/swh/storage/tests/test_objstorage_multiplexer.py b/swh/storage/tests/test_objstorage_multiplexer.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/tests/test_objstorage_multiplexer.py
	@@ -0,0 +1,78 @@
	+# Copyright (C) 2015-2016 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+import tempfile
	+import unittest
	+
	+from nose.tools import istest
	+
	+from swh.storage.objstorage import PathSlicingObjStorage
	+from swh.storage.objstorage.multiplexer import MultiplexerObjStorage
	+from swh.storage.objstorage.multiplexer.filter import add_filter, read_only
	+
	+from objstorage_testing import ObjStorageTestFixture
	+
	+
	+class TestMultiplexerObjStorage(ObjStorageTestFixture, unittest.TestCase):
	+
	+ def setUp(self):
	+ super().setUp()
	+ self.storage_v1 = PathSlicingObjStorage(tempfile.mkdtemp(), '0:2/2:4')
	+ self.storage_v2 = PathSlicingObjStorage(tempfile.mkdtemp(), '0:1/0:5')
	+
	+ self.r_storage = add_filter(self.storage_v1, read_only())
	+ self.w_storage = self.storage_v2
	+ self.storage = MultiplexerObjStorage([self.r_storage, self.w_storage])
	+
	+ @istest
	+ def contains(self):
	+ content_p, obj_id_p = self.hash_content(b'contains_present')
	+ content_m, obj_id_m = self.hash_content(b'contains_missing')
	+ self.storage.add(content_p, obj_id=obj_id_p)
	+ self.assertIn(obj_id_p, self.storage)
	+ self.assertNotIn(obj_id_m, self.storage)
	+
	+ @istest
	+ def iter(self):
	+ content, obj_id = self.hash_content(b'iter')
	+ self.assertEqual(list(iter(self.storage)), [])
	+ self.storage.add(content, obj_id=obj_id)
	+ self.assertEqual(list(iter(self.storage)), [obj_id])
	+
	+ @istest
	+ def len(self):
	+ content, obj_id = self.hash_content(b'len')
	+ self.assertEqual(len(self.storage), 0)
	+ self.storage.add(content, obj_id=obj_id)
	+ self.assertEqual(len(self.storage), 1)
	+
	+ @istest
	+ def len_multiple(self):
	+ content, obj_id = self.hash_content(b'len_multiple')
	+ # Add a content to the read-only storage
	+ self.storage_v1.add(content)
	+ self.assertEqual(len(self.storage), 1)
	+ # By adding the same content to the global storage, it should be
	+ # Replicated.
	+ # len() behavior is to indicates the number of files, not unique
	+ # contents.
	+ self.storage.add(content)
	+ self.assertEqual(len(self.storage), 2)
	+
	+ @istest
	+ def get_random_contents(self):
	+ content, obj_id = self.hash_content(b'get_random_content')
	+ self.storage.add(content)
	+ random_contents = list(self.storage.get_random(1))
	+ self.assertEqual(1, len(random_contents))
	+ self.assertIn(obj_id, random_contents)
	+
	+ @istest
	+ def access_readonly(self):
	+ # Add a content to the readonly storage
	+ content, obj_id = self.hash_content(b'content in read-only')
	+ self.storage_v1.add(content)
	+ # Try to retrieve it on the main storage
	+ self.assertIn(obj_id, self.storage)

File Metadata

Mime Type: text/plain
Expires: Dec 21 2024, 5:52 PM (28 w, 2 h ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3217554

D53.id188.diffNo OneTemporaryActions

D53.id188.diffView Options

File Metadata

Event Timeline

D53.id188.diff
No OneTemporary
Actions

D53.id188.diff
View Options