Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9340430
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
View Options
diff --git a/swh/storage/tests/test_archiver.py b/swh/storage/tests/test_archiver.py
index 91005c27..8beca01a 100644
--- a/swh/storage/tests/test_archiver.py
+++ b/swh/storage/tests/test_archiver.py
@@ -1,323 +1,323 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import tempfile
import unittest
import os
import time
import json
from nose.tools import istest
from nose.plugins.attrib import attr
from swh.core import hashutil
from swh.core.tests.db_testing import DbsTestFixture
from server_testing import ServerTestFixture
from swh.storage.archiver import ArchiverWithRetentionPolicyDirector
from swh.storage.archiver import ArchiverWithRetentionPolicyWorker
from swh.objstorage import get_objstorage
from swh.objstorage.exc import ObjNotFoundError
from swh.objstorage.api.server import app
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_DATA_DIR = os.path.join(TEST_DIR, '../../../../swh-storage-testdata')
@attr('db')
class TestArchiver(DbsTestFixture, ServerTestFixture,
unittest.TestCase):
""" Test the objstorage archiver.
"""
TEST_DB_NAMES = [
'softwareheritage-archiver-test',
]
TEST_DB_DUMPS = [
os.path.join(TEST_DATA_DIR, 'dumps/swh-archiver.dump'),
]
TEST_DB_DUMP_TYPES = [
'pg_dump',
]
def setUp(self):
# Launch the backup server
dest_root = tempfile.mkdtemp(prefix='remote')
self.config = {
'cls': 'pathslicing',
'args': {
'root': dest_root,
'slicing': '0:2/2:4/4:6',
}
}
self.app = app
super().setUp()
# Retrieve connection (depends on the order in TEST_DB_NAMES)
self.conn = self.conns[0] # archiver db's connection
self.cursor = self.cursors[0]
# Create source storage
src_root = tempfile.mkdtemp()
src_config = {
'cls': 'pathslicing',
'args': {
'root': src_root,
'slicing': '0:2/2:4/4:6'
}
}
self.src_storage = get_objstorage(**src_config)
# Create destination storage
dest_config = {
'cls': 'remote',
'args': {
- 'base_url': self.url()
+ 'url': self.url()
}
}
self.dest_storage = get_objstorage(**dest_config)
# Keep mapped the id to the storages
self.storages = {
'uffizi': self.src_storage,
'banco': self.dest_storage
}
# Override configurations
src_archiver_conf = {'host': 'uffizi'}
dest_archiver_conf = {'host': 'banco'}
src_archiver_conf.update(src_config)
dest_archiver_conf.update(dest_config)
self.archiver_storages = [src_archiver_conf, dest_archiver_conf]
self._override_director_config()
self._override_worker_config()
# Create the base archiver
self.archiver = self._create_director()
def tearDown(self):
self.empty_tables()
super().tearDown()
def empty_tables(self):
# Remove all content
self.cursor.execute('DELETE FROM content_archive')
self.conn.commit()
def _override_director_config(self, retention_policy=2):
""" Override the default config of the Archiver director
to allow the tests to use the *-test db instead of the default one as
there is no configuration file for now.
"""
ArchiverWithRetentionPolicyDirector.parse_config_file = lambda obj, additional_configs: { # noqa
'dbconn': self.conn,
'batch_max_size': 5000,
'archival_max_age': 3600,
'retention_policy': retention_policy,
'asynchronous': False,
}
def _override_worker_config(self):
""" Override the default config of the Archiver worker
to allow the tests to use the *-test db instead of the default one as
there is no configuration file for now.
"""
ArchiverWithRetentionPolicyWorker.parse_config_file = lambda obj, additional_configs: { # noqa
'retention_policy': 2,
'archival_max_age': 3600,
'dbconn': self.conn,
'storages': self.archiver_storages,
'source': 'uffizi',
}
def _create_director(self):
return ArchiverWithRetentionPolicyDirector()
def _create_worker(self, batch={}):
return ArchiverWithRetentionPolicyWorker(batch)
def _add_content(self, storage_name, content_data):
""" Add really a content to the given objstorage
This put an empty status for the added content.
Args:
storage_name: the concerned storage
content_data: the data to insert
with_row_insert: to insert a row entry in the db or not
"""
# Add the content to the storage
obj_id = self.storages[storage_name].add(content_data)
db_obj_id = r'\x' + hashutil.hash_to_hex(obj_id)
self.cursor.execute(""" INSERT INTO content_archive
VALUES('%s', '{}')
""" % (db_obj_id))
return obj_id
def _update_status(self, obj_id, storage_name, status, date=None):
""" Update the db status for the given id/storage_name.
This does not create the content in the storage.
"""
db_obj_id = r'\x' + hashutil.hash_to_hex(obj_id)
self.archiver.archiver_storage.content_archive_update(
db_obj_id, storage_name, status
)
def _add_dated_content(self, obj_id, copies={}):
""" Fully erase the previous copies field for the given content id
This does not alter the contents into the objstorages.
"""
db_obj_id = r'\x' + hashutil.hash_to_hex(obj_id)
self.cursor.execute(""" UPDATE TABLE content_archive
SET copies='%s'
WHERE content_id='%s'
""" % (json.dumps(copies), db_obj_id))
# Integration test
@istest
def archive_missing_content(self):
""" Run archiver on a missing content should archive it.
"""
obj_data = b'archive_missing_content'
obj_id = self._add_content('uffizi', obj_data)
self._update_status(obj_id, 'uffizi', 'present')
# Content is missing on banco (entry not present in the db)
try:
self.dest_storage.get(obj_id)
except ObjNotFoundError:
pass
else:
self.fail('Content should not be present before archival')
self.archiver.run()
# now the content should be present on remote objstorage
remote_data = self.dest_storage.get(obj_id)
self.assertEquals(obj_data, remote_data)
@istest
def archive_present_content(self):
""" A content that is not 'missing' shouldn't be archived.
"""
obj_id = self._add_content('uffizi', b'archive_present_content')
self._update_status(obj_id, 'uffizi', 'present')
self._update_status(obj_id, 'banco', 'present')
# After the run, the content should NOT be in the archive.
# As the archiver believe it was already in.
self.archiver.run()
with self.assertRaises(ObjNotFoundError):
self.dest_storage.get(obj_id)
@istest
def archive_already_enough(self):
""" A content missing with enough copies shouldn't be archived.
"""
obj_id = self._add_content('uffizi', b'archive_alread_enough')
self._update_status(obj_id, 'uffizi', 'present')
self._override_director_config(retention_policy=1)
director = self._create_director()
# Obj is present in only one archive but only one copy is required.
director.run()
with self.assertRaises(ObjNotFoundError):
self.dest_storage.get(obj_id)
# Unit tests for archive worker
def archival_elapsed(self, mtime):
return self._create_worker()._is_archival_delay_elapsed(mtime)
@istest
def vstatus_ongoing_remaining(self):
self.assertFalse(self.archival_elapsed(time.time()))
@istest
def vstatus_ongoing_elapsed(self):
past_time = (
time.time() - self._create_worker().archival_max_age
)
self.assertTrue(self.archival_elapsed(past_time))
def _status(self, status, mtime=None):
""" Get a dict that match the copies structure
"""
return {'status': status, 'mtime': mtime or time.time()}
@istest
def need_archival_missing(self):
""" A content should need archival when it is missing.
"""
status_copies = {'present': ['uffizi'], 'missing': ['banco']}
worker = self._create_worker()
self.assertEqual(worker.need_archival(status_copies),
True)
@istest
def need_archival_present(self):
""" A content present everywhere shouldn't need archival
"""
status_copies = {'present': ['uffizi', 'banco']}
worker = self._create_worker()
self.assertEqual(worker.need_archival(status_copies),
False)
def _compute_copies_status(self, status):
""" A content with a given status should be detected correctly
"""
obj_id = self._add_content(
'banco', b'compute_copies_' + bytes(status, 'utf8'))
self._update_status(obj_id, 'banco', status)
worker = self._create_worker()
self.assertIn('banco', worker.compute_copies(
set(worker.objstorages), obj_id)[status])
@istest
def compute_copies_present(self):
""" A present content should be detected with correct status
"""
self._compute_copies_status('present')
@istest
def compute_copies_missing(self):
""" A missing content should be detected with correct status
"""
self._compute_copies_status('missing')
def _get_backups(self, present, missing):
""" Return a list of the pair src/dest from the present and missing
"""
worker = self._create_worker()
return list(worker.choose_backup_servers(present, missing))
@istest
def choose_backup_servers(self):
self.assertEqual(len(self._get_backups(['uffizi', 'banco'], [])), 0)
self.assertEqual(len(self._get_backups(['uffizi'], ['banco'])), 1)
# Even with more possible destinations, do not take more than the
# retention_policy require
self.assertEqual(
len(self._get_backups(['uffizi'], ['banco', 's3'])),
1
)
# This cannot be tested with ArchiverWithRetentionPolicyDirector
# (it reads from archiver db)
# @istest
# def archive_missing_content__without_row_entry_in_archive_db(self):
# """ Run archiver on a missing content should archive it.
# """
# obj_data = b'archive_missing_content_without_row_entry_in_archive_db'
# obj_id = self._add_content('uffizi', obj_data)
# # One entry in archiver db but no status about its whereabouts
# # Content is actually missing on banco but present on uffizi
# try:
# self.dest_storage.get(obj_id)
# except ObjNotFoundError:
# pass
# else:
# self.fail('Content should not be present before archival')
# self.archiver.run()
# # now the content should be present on remote objstorage
# remote_data = self.dest_storage.get(obj_id)
# self.assertEquals(obj_data, remote_data)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 10:39 AM (4 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3322247
Attached To
rDSTO Storage manager
Event Timeline
Log In to Comment