diff --git a/bin/swh-loader-tar-retrieve-tarball b/bin/swh-loader-tar-retrieve-tarball index bc43771..e32d58b 100755 --- a/bin/swh-loader-tar-retrieve-tarball +++ b/bin/swh-loader-tar-retrieve-tarball @@ -1,81 +1,112 @@ #!/usr/bin/env python3 # NOT FOR PRODUCTION (does not use the stable swh storage api yet) # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse +import os +import shutil +import tempfile from swh.core import hashutil, config def escape_hash(sha1): """Escape an hexa sha1 to a ready queryable sha1.""" if isinstance(sha1, bytes): sha1 = hashutil.hash_to_hex(sha1) return '\\x%s' % sha1 def upper_directory_from(revision_id): """Return the directory sha1 the revision with id revision_id points to. """ for revision in storage.revision_get([revision_id]): return revision['directory'] def directory_ls(directory_id, recursive=True): """List entries from directory. """ entries = storage.directory_get(directory_id, recursive=recursive) for entry in entries: yield {'dir_id': hashutil.hash_to_hex(entry[0]), 'type': entry[1], 'target': hashutil.hash_to_hex(entry[2]), 'name': entry[3].decode('utf-8'), 'perms': entry[4]} def build_archive_from_revision(revision_id): directory_id = upper_directory_from(revision_id) print('directory id: %s' % hashutil.hash_to_hex(directory_id)) + tmpdir = tempfile.mkdtemp(suffix='create-tarball', + prefix='swh.loader.tar', + dir='/tmp') + count = 0 - for entry in directory_ls(directory_id, True): + tarball_name = '' + for entry in directory_ls(directory_id, recursive=True): + name = entry['name'] + perms = entry['perms'] + + path = os.path.join(tmpdir, name) + if count == 0: + tarball_name = path # order in the query + + if perms == 40000: + os.makedirs(path, exist_ok=True) + os.chmod(path, 0o755) + elif perms == 100644: + dirpath = os.path.dirname(path) + os.makedirs(dirpath, exist_ok=True) + os.chmod(dirpath, 0o755) + + open(path, 'w') # FIXME: add content to it... + os.chmod(path, 0o644) + else: + print('skip link for the moment...') # FIXME: need content + continue + + print(perms, path) count += 1 - print(entry['perms'], entry['name'], entry['target']) - print('nb entries: %s' % count) + print('tarball %s: %s entries' % (tarball_name, count)) + + shutil.rmtree(tmpdir) def parse_args(): """Parse the configuration from the cli. """ cli = argparse.ArgumentParser( description='Tarball creation from swh-storage.') cli.add_argument('--config-file', '-c', help='configuration file') args = cli.parse_args() return args if __name__ == '__main__': args = parse_args() conf = config.read(args.config_file) if conf['storage_class'] == 'remote_storage': from swh.storage.api.client import RemoteStorage as Storage else: from swh.storage import Storage storage = Storage(conf['storage_args']) revision_id = hashutil.hex_to_hash( '7c82241cb2a564c79f2930ac9416800bbb2a6d3e') build_archive_from_revision(revision_id)