diff --git a/swh/storage/objstorage/multiplexer/filter/__init__.py b/swh/storage/objstorage/multiplexer/filter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/swh/storage/objstorage/multiplexer/filter/filter.py b/swh/storage/objstorage/multiplexer/filter/filter.py new file mode 100644 index 00000000..fa4cc06b --- /dev/null +++ b/swh/storage/objstorage/multiplexer/filter/filter.py @@ -0,0 +1,48 @@ +# Copyright (C) 2015-2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from ...objstorage import ObjStorage + + +class ObjStorageFilter(ObjStorage): + """ Base implementation of a filter that allow inputs on ObjStorage or not + + This class copy the API of ...objstorage in order to filter the inputs + of this class. + If the operation is allowed, return the result of this operation + applied to the destination implementation. Otherwise, just return + without any operation. + + This class is an abstract base class for a classic read/write storage. + Filters can inherit from it and only redefine some methods in order + to change behavior. + """ + + def __init__(self, storage): + self.storage = storage + + def __contains__(self, *args, **kwargs): + return self.storage.__contains__(*args, **kwargs) + + def __iter__(self): + return self.storage.__iter__() + + def __len__(self): + return self.storage.__len__() + + def add(self, *args, **kwargs): + return self.storage.add(*args, **kwargs) + + def restore(self, *args, **kwargs): + return self.storage.restore(*args, **kwargs) + + def get(self, *args, **kwargs): + return self.storage.get(*args, **kwargs) + + def check(self, *args, **kwargs): + return self.storage.check(*args, **kwargs) + + def get_random(self, *args, **kwargs): + return self.storage.get_random(*args, **kwargs) diff --git a/swh/storage/objstorage/multiplexer/filter/id_filter.py b/swh/storage/objstorage/multiplexer/filter/id_filter.py new file mode 100644 index 00000000..b7faa30a --- /dev/null +++ b/swh/storage/objstorage/multiplexer/filter/id_filter.py @@ -0,0 +1,99 @@ +# Copyright (C) 2015-2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import re + +from swh.core import hashutil + +from .filter import ObjStorageFilter +from ...objstorage import ID_HASH_ALGO +from ....exc import ObjNotFoundError + + +def compute_hash(bytes): + """ Compute the hash of the given content. + """ + # Checksum is missing, compute it on the fly. + h = hashutil._new_hash(ID_HASH_ALGO, len(bytes)) + h.update(bytes) + return h.digest() + + +class IdObjStorageFilter(ObjStorageFilter): + """ Filter that only allow operations if the object id match a requirement. + + Even for read operations, check before if the id match the requirements. + This may prevent for unnecesary disk access. + """ + + def is_valid(self, obj_id): + """ Indicates if the given id is valid. + """ + raise NotImplementedError('Implementations of an IdObjStorageFilter ' + 'must have a "is_valid" method') + + def __contains__(self, obj_id, *args, **kwargs): + if self.is_valid(obj_id): + return self.storage.__contains__(*args, obj_id=obj_id, **kwargs) + return False + + def __len__(self): + return sum(1 for i in [id for id in self.storage if self.is_valid(id)]) + + def __iter__(self): + yield from filter(lambda id: self.is_valid(id), iter(self.storage)) + + def add(self, content, obj_id=None, check_presence=True, *args, **kwargs): + if obj_id is None: + obj_id = compute_hash(content) + if self.is_valid(obj_id): + return self.storage.add(content, *args, obj_id=obj_id, **kwargs) + + def restore(self, content, obj_id=None, *args, **kwargs): + if obj_id is None: + obj_id = compute_hash(content) + if self.is_valid(obj_id): + return self.storage.restore(content, *args, + obj_id=obj_id, **kwargs) + + def get(self, obj_id, *args, **kwargs): + if self.is_valid(obj_id): + return self.storage.get(*args, obj_id=obj_id, **kwargs) + raise ObjNotFoundError(obj_id) + + def check(self, obj_id, *args, **kwargs): + if self.is_valid(obj_id): + return self.storage.check(*args, obj_id=obj_id, **kwargs) + raise ObjNotFoundError(obj_id) + + def get_random(self, *args, **kwargs): + yield from filter(lambda id: self.is_valid(id), + self.storage.get_random(*args, **kwargs)) + + +class RegexIdObjStorageFilter(IdObjStorageFilter): + """ Filter that allow operations if the content's id as hex match a regex. + """ + + def __init__(self, storage, regex): + super().__init__(storage) + self.regex = re.compile(regex) + + def is_valid(self, obj_id): + hex_obj_id = hashutil.hash_to_hex(obj_id) + return self.regex.match(hex_obj_id) is not None + + +class PrefixIdObjStorageFilter(IdObjStorageFilter): + """ Filter that allow operations if the hexlified id have a given prefix. + """ + + def __init__(self, storage, prefix): + super().__init__(storage) + self.prefix = str(prefix) + + def is_valid(self, obj_id): + hex_obj_id = hashutil.hash_to_hex(obj_id) + return str(hex_obj_id).startswith(self.prefix) diff --git a/swh/storage/objstorage/multiplexer/filter/read_write_filter.py b/swh/storage/objstorage/multiplexer/filter/read_write_filter.py new file mode 100644 index 00000000..e4821b9c --- /dev/null +++ b/swh/storage/objstorage/multiplexer/filter/read_write_filter.py @@ -0,0 +1,17 @@ +# Copyright (C) 2015-2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from .filter import ObjStorageFilter + + +class ReadObjStorageFilter(ObjStorageFilter): + """ Filter that disable write operation of the storage. + """ + + def add(self, *args, **kwargs): + return + + def restore(self, *args, **kwargs): + return