diff --git a/bin/swh-storage-backup-client b/bin/swh-storage-backup-client new file mode 100755 index 0000000..663f81d --- /dev/null +++ b/bin/swh-storage-backup-client @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2016 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +# Expected use: +# find /srv/softwareheritage/objects/ -type f | \ +# ./bin/swh-storage-backup-client \ +# --remote-storage http://objstorage-backup:5000/ \ +# --local-storage-path /tmp/storage \ +# tee -a banco-backuped-content + + +import click +import os +import sys + +from swh.core.hashutil import hex_to_hash +from swh.objstorage.api.client import RemoteObjStorage +from swh.objstorage.objstorage_pathslicing import PathSlicingObjStorage + + +@click.command() +@click.option('--remote-storage', + default='http://localhost:5000/', + help='Remote storage to use.') +@click.option('--local-storage-path', + default='/tmp/storage', + help='Local storage') +@click.option('--root-slicing', + default='0:2/2:4/4:6', + help='Local storage') +def main(remote_storage, local_storage_path, root_slicing): + # Instantiate the remote obj storage client + client = RemoteObjStorage(remote_storage) + local_storage = PathSlicingObjStorage(local_storage_path, root_slicing) + + # Will read the folder path from stdin and scan its files (content) + # Expects something like '/srv/softwareheritage/objects/00/00/ff/' + for filepath in sys.stdin: + filepath = filepath.rstrip() + obj_id_hex = os.path.basename(filepath) + + obj_id = hex_to_hash(obj_id_hex) + try: + client.content_check(obj_id) + except: + # api raises exception when check fails (either not found + # or corruption) + pass + else: + continue # skip known and correct content + + # Deal with either unknown or incorrect content from + # the remote storage's standpoint + + obj_content = local_storage.get(obj_id) + client.content_add(obj_content, obj_id) + + print(obj_id_hex) + + +if __name__ == '__main__': + main()