diff --git a/swh/vault/api/server.py b/swh/vault/api/server.py --- a/swh/vault/api/server.py +++ b/swh/vault/api/server.py @@ -21,13 +21,19 @@ 'args': { 'db': 'dbname=softwareheritage-dev', 'objstorage': { - 'root': '/tmp/objects', + 'root': '/srv/softwareheritage/objects', 'slicing': '0:2/2:4/4:6', }, }, }), - 'cache': ('dict', {'root': '/tmp/vaultcache'}), - 'vault_db': ('str', 'dbname=swh-vault') + 'cache': ('dict', { + 'cls': 'pathslicing', + 'args': { + 'root': '/srv/softwareheritage/vault', + 'slicing': '0:1/1:5', + }, + }), + 'db': ('str', 'dbname=swh-vault') } diff --git a/swh/vault/backend.py b/swh/vault/backend.py --- a/swh/vault/backend.py +++ b/swh/vault/backend.py @@ -76,7 +76,7 @@ """ def __init__(self, config): self.config = config - self.cache = VaultCache(**self.config['cache']) + self.cache = VaultCache(self.config['cache']) self.db = None self.reconnect() self.smtp_server = smtplib.SMTP('localhost') @@ -85,7 +85,7 @@ """Reconnect to the database.""" if not self.db or self.db.closed: self.db = psycopg2.connect( - dsn=self.config['vault_db'], + dsn=self.config['db'], cursor_factory=psycopg2.extras.RealDictCursor, ) diff --git a/swh/vault/cache.py b/swh/vault/cache.py --- a/swh/vault/cache.py +++ b/swh/vault/cache.py @@ -3,63 +3,45 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os - from swh.model import hashutil from swh.objstorage import get_objstorage -from swh.objstorage.objstorage_pathslicing import DIR_MODE - +from swh.objstorage.objstorage import compute_hash -class VaultCache(): - """The vault cache is an object storage that stores bundles - The current implementation uses a PathSlicingObjStorage to store - the bundles. The id of a content if prefixed to specify its type - and store different types of bundle in different folders. +class VaultCache: + """The Vault cache is an object storage that stores Vault bundles. + This implementation computes sha1(':') as the + internal identifiers used in the underlying objstorage. """ - def __init__(self, root): - self.root = root - self.storages = {} + def __init__(self, objstorage): + self.objstorage = get_objstorage(**objstorage) def add(self, obj_type, obj_id, content): - storage = self._get_storage(obj_type) - return storage.add(content, obj_id) + sid = self._get_internal_id(obj_type, obj_id) + return self.objstorage.add(content, sid) def get(self, obj_type, obj_id): - storage = self._get_storage(obj_type) - return storage.get(hashutil.hash_to_bytes(obj_id)) + sid = self._get_internal_id(obj_type, obj_id) + return self.objstorage.get(hashutil.hash_to_bytes(sid)) def delete(self, obj_type, obj_id): - storage = self._get_storage(obj_type) - return storage.delete(hashutil.hash_to_bytes(obj_id)) + sid = self._get_internal_id(obj_type, obj_id) + return self.objstorage.delete(hashutil.hash_to_bytes(sid)) def add_stream(self, obj_type, obj_id, content_iter): - storage = self._get_storage(obj_type) - return storage.add_stream(content_iter, obj_id) + sid = self._get_internal_id(obj_type, obj_id) + return self.objstorage.add_stream(content_iter, sid) def get_stream(self, obj_type, obj_id): - storage = self._get_storage(obj_type) - return storage.get_stream(hashutil.hash_to_bytes(obj_id)) + sid = self._get_internal_id(obj_type, obj_id) + return self.objstorage.get_stream(hashutil.hash_to_bytes(sid)) def is_cached(self, obj_type, obj_id): - storage = self._get_storage(obj_type) - return hashutil.hash_to_bytes(obj_id) in storage - - def ls(self, obj_type): - storage = self._get_storage(obj_type) - yield from storage - - def _get_storage(self, obj_type): - """Get the storage that corresponds to the object type""" - - if obj_type not in self.storages: - fp = os.path.join(self.root, obj_type) - if not os.path.isdir(fp): - os.makedirs(fp, DIR_MODE, exist_ok=True) - - conf = {'root': fp, 'slicing': '0:1/0:5', 'allow_delete': True} - self.storages[obj_type] = get_objstorage('pathslicing', conf) + sid = self._get_internal_id(obj_type, obj_id) + return hashutil.hash_to_bytes(sid) in self.objstorage - return self.storages[obj_type] + def _get_internal_id(self, obj_type, obj_id): + obj_id = hashutil.hash_to_hex(obj_id) + return compute_hash('{}:{}'.format(obj_type, obj_id).encode()) diff --git a/swh/vault/tests/vault_testing.py b/swh/vault/tests/vault_testing.py --- a/swh/vault/tests/vault_testing.py +++ b/swh/vault/tests/vault_testing.py @@ -39,8 +39,15 @@ self.cache_root = tempfile.TemporaryDirectory('vault-cache-') self.vault_config = { 'storage': self.storage_config, - 'vault_db': 'postgresql:///' + self.TEST_VAULT_DB_NAME, - 'cache': {'root': self.cache_root.name} + 'db': 'postgresql:///' + self.TEST_VAULT_DB_NAME, + 'cache': { + 'cls': 'pathslicing', + 'args': { + 'root': self.cache_root.name, + 'slicing': '0:1/1:5', + 'allow_delete': True, + } + } } self.vault_backend = VaultBackend(self.vault_config)