diff --git a/swh/storage/checker/checker.py b/swh/storage/checker/checker.py --- a/swh/storage/checker/checker.py +++ b/swh/storage/checker/checker.py @@ -14,7 +14,7 @@ DEFAULT_CONFIG = { 'storage_path': ('str', '/srv/softwareheritage/objects'), 'storage_depth': ('int', 3), - 'backup_url': ('str', 'http://uffizi:5002'), + 'backup_url': ('str', 'http://uffizi:5002/'), 'batch_size': ('int', 1000), } @@ -58,7 +58,6 @@ for content_id in self.get_content_to_check(batch_size): if not self.check_content(content_id): - self.invalidate_content(content_id) corrupted_contents.append(content_id) logging.error('The content', content_id, 'have been corrupted') @@ -108,11 +107,11 @@ @click.command() @click.argument('config-path', required=1) -@click.option('--storage-path', default=DEFAULT_CONFIG['storage_path'], +@click.option('--storage-path', default=DEFAULT_CONFIG['storage_path'][1], help='Path to the storage to verify') -@click.option('--depth', default=DEFAULT_CONFIG['storage_depth'], +@click.option('--depth', default=DEFAULT_CONFIG['storage_depth'][1], type=click.INT, help='Depth of the object storage') -@click.option('--backup-url', default=DEFAULT_CONFIG['backup_url'], +@click.option('--backup-url', default=DEFAULT_CONFIG['backup_url'][1], help='Url of a remote storage to retrieve corrupted content') def launch(config_path, storage_path, depth, backup_url): # The configuration have following priority : @@ -128,7 +127,10 @@ checker = ContentChecker( {'batch_size': conf['batch_size']}, conf['storage_path'], - conf['depth'], + conf['storage_depth'], conf['backup_url'] ) checker.run() + +if __name__ == '__main__': + launch() diff --git a/swh/storage/objstorage/objstorage.py b/swh/storage/objstorage/objstorage.py --- a/swh/storage/objstorage/objstorage.py +++ b/swh/storage/objstorage/objstorage.py @@ -331,7 +331,8 @@ path = os.path.join(self._root_dir, *dirs) content_list = next(os.walk(path))[2] length = min(batch_size, len(content_list)) - return length, random.sample(content_list, length) + return length, map(hashutil.hex_to_hash, + random.sample(content_list, length)) while batch_size: length, it = get_random_content(self, batch_size)