Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_multiplexer_filter.py
- This file was added.
# Copyright (C) 2015-2016 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import random | |||||
import unittest | |||||
from nose.tools import istest | |||||
from nose.plugins.attrib import attr | |||||
from swh.core import hashutil | |||||
from swh.storage.exc import ObjNotFoundError, Error | |||||
from swh.storage.objstorage import ObjStorage | |||||
from swh.storage.objstorage.multiplexer.filter import (add_filter, read_only, | |||||
id_prefix) | |||||
class MockObjStorage(ObjStorage): | |||||
""" Mock an object storage for testing the filters. | |||||
""" | |||||
def __init__(self): | |||||
self.objects = {} | |||||
def __contains__(self, obj_id): | |||||
return obj_id in self.objects | |||||
def __len__(self): | |||||
return len(self.objects) | |||||
def __iter__(self): | |||||
return iter(self.objects) | |||||
def id(self, content): | |||||
# Id is the content itself for easily choose the id of | |||||
# a content for filtering. | |||||
return hashutil.hashdata(content)['sha1'] | |||||
return content | |||||
def add(self, content, obj_id=None, check_presence=True): | |||||
if obj_id is None: | |||||
obj_id = self.id(content) | |||||
if check_presence and obj_id in self.objects: | |||||
return obj_id | |||||
self.objects[obj_id] = content | |||||
return obj_id | |||||
def restore(self, content, obj_id=None): | |||||
return self.add(content, obj_id, check_presence=False) | |||||
def get(self, obj_id): | |||||
if obj_id not in self: | |||||
raise ObjNotFoundError(obj_id) | |||||
return self.objects[obj_id] | |||||
def check(self, obj_id): | |||||
if obj_id not in self: | |||||
raise ObjNotFoundError(obj_id) | |||||
if obj_id != self.id(self.objects[obj_id]): | |||||
raise Error(obj_id) | |||||
def get_random(self, batch_size): | |||||
batch_size = min(len(self), batch_size) | |||||
return random.sample(list(self.objects), batch_size) | |||||
@attr('!db') | |||||
class MixinTestReadFilter(unittest.TestCase): | |||||
# Read only filter should not allow writing | |||||
def setUp(self): | |||||
super().setUp() | |||||
storage = MockObjStorage() | |||||
self.valid_content = b'pre-existing content' | |||||
self.invalid_content = b'invalid_content' | |||||
self.true_invalid_content = b'Anything that is not correct' | |||||
self.absent_content = b'non-existent content' | |||||
# Create a valid content. | |||||
self.valid_id = storage.add(self.valid_content) | |||||
# Create an invalid id and add a content with it. | |||||
self.invalid_id = storage.id(self.true_invalid_content) | |||||
storage.add(self.invalid_content, obj_id=self.invalid_id) | |||||
# Compute an id for a non-existing content. | |||||
self.absent_id = storage.id(self.absent_content) | |||||
self.storage = add_filter(storage, read_only()) | |||||
@istest | |||||
def can_contains(self): | |||||
self.assertTrue(self.valid_id in self.storage) | |||||
self.assertTrue(self.invalid_id in self.storage) | |||||
self.assertFalse(self.absent_id in self.storage) | |||||
@istest | |||||
def can_iter(self): | |||||
self.assertIn(self.valid_id, iter(self.storage)) | |||||
self.assertIn(self.invalid_id, iter(self.storage)) | |||||
@istest | |||||
def can_len(self): | |||||
self.assertEqual(2, len(self.storage)) | |||||
@istest | |||||
def can_get(self): | |||||
self.assertEqual(self.valid_content, self.storage.get(self.valid_id)) | |||||
self.assertEqual(self.invalid_content, | |||||
self.storage.get(self.invalid_id)) | |||||
@istest | |||||
def can_check(self): | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.absent_id) | |||||
with self.assertRaises(Error): | |||||
self.storage.check(self.invalid_id) | |||||
self.storage.check(self.valid_id) | |||||
@istest | |||||
def can_get_random(self): | |||||
self.assertEqual(1, len(self.storage.get_random(1))) | |||||
self.assertEqual(len(self.storage), len(self.storage.get_random(1000))) | |||||
@istest | |||||
def cannot_add(self): | |||||
new_id = self.storage.add(b'New content') | |||||
result = self.storage.add(self.valid_content, self.valid_id) | |||||
self.assertNotIn(new_id, self.storage) | |||||
self.assertIsNone(result) | |||||
@istest | |||||
def cannot_restore(self): | |||||
new_id = self.storage.restore(b'New content') | |||||
result = self.storage.restore(self.valid_content, self.valid_id) | |||||
self.assertNotIn(new_id, self.storage) | |||||
self.assertIsNone(result) | |||||
class MixinTestIdFilter(): | |||||
""" Mixin class that tests the filters based on filter.IdFilter | |||||
Methods "make_valid", "make_invalid" and "filter_storage" must be | |||||
implemented by subclasses. | |||||
""" | |||||
def setUp(self): | |||||
super().setUp() | |||||
# Use a hack here : as the mock uses the content as id, it is easy to | |||||
# create contents that are filtered or not. | |||||
self.prefix = '71' | |||||
storage = MockObjStorage() | |||||
# Present content with valid id | |||||
self.present_valid_content = self.ensure_valid(b'yroqdtotji') | |||||
self.present_valid_id = storage.id(self.present_valid_content) | |||||
# Present content with invalid id | |||||
self.present_invalid_content = self.ensure_invalid(b'glxddlmmzb') | |||||
self.present_invalid_id = storage.id(self.present_invalid_content) | |||||
# Missing content with valid id | |||||
self.missing_valid_content = self.ensure_valid(b'rmzkdclkez') | |||||
self.missing_valid_id = storage.id(self.missing_valid_content) | |||||
# Missing content with invalid id | |||||
self.missing_invalid_content = self.ensure_invalid(b'hlejfuginh') | |||||
self.missing_invalid_id = storage.id(self.missing_invalid_content) | |||||
# Present corrupted content with valid id | |||||
self.present_corrupted_valid_content = self.ensure_valid(b'cdsjwnpaij') | |||||
self.true_present_corrupted_valid_content = self.ensure_valid( | |||||
b'mgsdpawcrr') | |||||
self.present_corrupted_valid_id = storage.id( | |||||
self.true_present_corrupted_valid_content) | |||||
# Present corrupted content with invalid id | |||||
self.present_corrupted_invalid_content = self.ensure_invalid( | |||||
b'pspjljnrco') | |||||
self.true_present_corrupted_invalid_content = self.ensure_invalid( | |||||
b'rjocbnnbso') | |||||
self.present_corrupted_invalid_id = storage.id( | |||||
self.true_present_corrupted_invalid_content) | |||||
# Missing (potentially) corrupted content with valid id | |||||
self.missing_corrupted_valid_content = self.ensure_valid( | |||||
b'zxkokfgtou') | |||||
self.true_missing_corrupted_valid_content = self.ensure_valid( | |||||
b'royoncooqa') | |||||
self.missing_corrupted_valid_id = storage.id( | |||||
self.true_missing_corrupted_valid_content) | |||||
# Missing (potentially) corrupted content with invalid id | |||||
self.missing_corrupted_invalid_content = self.ensure_invalid( | |||||
b'hxaxnrmnyk') | |||||
self.true_missing_corrupted_invalid_content = self.ensure_invalid( | |||||
b'qhbolyuifr') | |||||
self.missing_corrupted_invalid_id = storage.id( | |||||
self.true_missing_corrupted_invalid_content) | |||||
# Add the content that are supposed to be present | |||||
storage.add(self.present_valid_content) | |||||
storage.add(self.present_invalid_content) | |||||
storage.add(self.present_corrupted_valid_content, | |||||
obj_id=self.present_corrupted_valid_id) | |||||
storage.add(self.present_corrupted_invalid_content, | |||||
obj_id=self.present_corrupted_invalid_id) | |||||
# Make the storage filtered | |||||
self.base_storage = storage | |||||
self.storage = self.filter_storage(storage) | |||||
def filter_storage(self, storage): | |||||
raise NotImplementedError( | |||||
'Id_filter test class must have a filter_storage method') | |||||
def ensure_valid(self, content): | |||||
raise NotImplementedError( | |||||
'Id_filter test class must have a make_valid method') | |||||
def ensure_invalid(self, content): | |||||
raise NotImplementedError( | |||||
'Id_filter test class must have a make_invalid method') | |||||
@istest | |||||
def contains(self): | |||||
# Both contents are present, but the invalid one should be ignored. | |||||
self.assertTrue(self.present_valid_id in self.storage) | |||||
self.assertFalse(self.present_invalid_id in self.storage) | |||||
self.assertFalse(self.missing_valid_id in self.storage) | |||||
self.assertFalse(self.missing_invalid_id in self.storage) | |||||
self.assertTrue(self.present_corrupted_valid_id in self.storage) | |||||
self.assertFalse(self.present_corrupted_invalid_id in self.storage) | |||||
self.assertFalse(self.missing_corrupted_valid_id in self.storage) | |||||
self.assertFalse(self.missing_corrupted_invalid_id in self.storage) | |||||
@istest | |||||
def iter(self): | |||||
self.assertIn(self.present_valid_id, iter(self.storage)) | |||||
self.assertNotIn(self.present_invalid_id, iter(self.storage)) | |||||
self.assertNotIn(self.missing_valid_id, iter(self.storage)) | |||||
self.assertNotIn(self.missing_invalid_id, iter(self.storage)) | |||||
self.assertIn(self.present_corrupted_valid_id, iter(self.storage)) | |||||
self.assertNotIn(self.present_corrupted_invalid_id, iter(self.storage)) | |||||
self.assertNotIn(self.missing_corrupted_valid_id, iter(self.storage)) | |||||
self.assertNotIn(self.missing_corrupted_invalid_id, iter(self.storage)) | |||||
@istest | |||||
def len(self): | |||||
# Four contents are present, but only two should be valid. | |||||
self.assertEqual(2, len(self.storage)) | |||||
@istest | |||||
def get(self): | |||||
self.assertEqual(self.present_valid_content, | |||||
self.storage.get(self.present_valid_id)) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.get(self.present_invalid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.get(self.missing_valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.get(self.missing_invalid_id) | |||||
self.assertEqual(self.present_corrupted_valid_content, | |||||
self.storage.get(self.present_corrupted_valid_id)) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.get(self.present_corrupted_invalid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.get(self.missing_corrupted_valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.get(self.missing_corrupted_invalid_id) | |||||
@istest | |||||
def check(self): | |||||
self.storage.check(self.present_valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.present_invalid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.missing_valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.missing_invalid_id) | |||||
with self.assertRaises(Error): | |||||
self.storage.check(self.present_corrupted_valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.present_corrupted_invalid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.missing_corrupted_valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(self.missing_corrupted_invalid_id) | |||||
@istest | |||||
def get_random(self): | |||||
self.assertEqual(0, len(list(self.storage.get_random(0)))) | |||||
random_content = list(self.storage.get_random(1000)) | |||||
self.assertIn(self.present_valid_id, random_content) | |||||
self.assertNotIn(self.present_invalid_id, random_content) | |||||
self.assertNotIn(self.missing_valid_id, random_content) | |||||
self.assertNotIn(self.missing_invalid_id, random_content) | |||||
self.assertIn(self.present_corrupted_valid_id, random_content) | |||||
self.assertNotIn(self.present_corrupted_invalid_id, random_content) | |||||
self.assertNotIn(self.missing_corrupted_valid_id, random_content) | |||||
self.assertNotIn(self.missing_corrupted_invalid_id, random_content) | |||||
@istest | |||||
def add(self): | |||||
# Add valid and invalid contents to the storage and check their | |||||
# presence with the unfiltered storage. | |||||
valid_content = self.ensure_valid(b'ulepsrjbgt') | |||||
valid_id = self.base_storage.id(valid_content) | |||||
invalid_content = self.ensure_invalid(b'znvghkjked') | |||||
invalid_id = self.base_storage.id(invalid_content) | |||||
self.storage.add(valid_content) | |||||
self.storage.add(invalid_content) | |||||
self.assertTrue(valid_id in self.base_storage) | |||||
self.assertFalse(invalid_id in self.base_storage) | |||||
@istest | |||||
def restore(self): | |||||
# Add corrupted content to the storage and the try to restore it | |||||
valid_content = self.ensure_valid(b'ulepsrjbgt') | |||||
valid_id = self.base_storage.id(valid_content) | |||||
corrupted_content = self.ensure_valid(b'ltjkjsloyb') | |||||
corrupted_id = self.base_storage.id(corrupted_content) | |||||
self.storage.add(corrupted_content, obj_id=valid_id) | |||||
with self.assertRaises(ObjNotFoundError): | |||||
self.storage.check(corrupted_id) | |||||
with self.assertRaises(Error): | |||||
self.storage.check(valid_id) | |||||
self.storage.restore(valid_content) | |||||
self.storage.check(valid_id) | |||||
@attr('!db') | |||||
class TestPrefixFilter(MixinTestIdFilter, unittest.TestCase): | |||||
def setUp(self): | |||||
self.prefix = b'71' | |||||
super().setUp() | |||||
def ensure_valid(self, content): | |||||
obj_id = hashutil.hashdata(content)['sha1'] | |||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | |||||
print("Ensure", content, hex_obj_id, "=", obj_id, "is valid") | |||||
ardumont: Beware the print ^^ | |||||
self.assertTrue(hex_obj_id.startswith(self.prefix)) | |||||
return content | |||||
def ensure_invalid(self, content): | |||||
obj_id = hashutil.hashdata(content)['sha1'] | |||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | |||||
print("Ensure", content, hex_obj_id, "=", obj_id, "is invalid") | |||||
ardumontUnsubmitted Done Inline ActionsHere too. ardumont: Here too. | |||||
self.assertFalse(hex_obj_id.startswith(self.prefix)) | |||||
return content | |||||
def filter_storage(self, storage): | |||||
return add_filter(storage, id_prefix(self.prefix)) |
Beware the print ^^