Page MenuHomeSoftware Heritage

D1181.diff
No OneTemporary

D1181.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -2,3 +2,50 @@
==============
Content-addressable object storage for the Software Heritage project.
+
+
+Quick start
+-----------
+
+The easiest way to try the swh-objstorage object storage is to install it in a
+virtualenv. Here, we will be using
+[[https://virtualenvwrapper.readthedocs.io|virtualenvwrapper]]_ but any virtual
+env tool should work the same.
+
+In the example below we will create a new objstorage using the
+[[https://docs.softwareheritage.org/devel/apidoc/swh.objstorage.html#module-swh.objstorage.objstorage_pathslicing|pathslicer]]
+backend.
+
+
+```
+~/swh$ mkvirtualenv -p /usr/bin/python3 -i swh.objstorage swh-objstorage
+[...]
+(swh-objstorage) ~/swh$ cat >local.yml <<EOF
+objstorage:
+ cls: pathslicing
+ args:
+ root: /tmp/objstorage
+ slicing: 0:2/2:4/4:6
+EOF
+(swh-objstorage) ~/swh$ mkdir /tmp/objstorage
+(swh-objstorage) ~/swh$ swh-objstorage -C local.yml serve -p 15003
+INFO:swh.core.config:Loading config file local.yml
+======== Running on http://0.0.0.0:15003 ========
+(Press CTRL+C to quit)
+```
+
+Now we have an API listening on http://0.0.0.0:15003 we can use to store and
+retrieve objects from. I an other terminal:
+
+```
+~/swh$ workon swh-objstorage
+(swh-objstorage) ~/swh$ cat >remote.yml <<EOF
+objstorage:
+ cls: remote
+ args:
+ url: http://127.0.0.1:15003
+EOF
+(swh-objstorage) ~/swh$ swh-objstorage -C remote.yml import .
+INFO:swh.core.config:Loading config file remote.yml
+Imported 1369 files for a volume of 722837 bytes in 2 seconds
+```
diff --git a/bin/swh-objstorage-add-dir b/bin/swh-objstorage-add-dir
deleted file mode 100755
--- a/bin/swh-objstorage-add-dir
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (C) 2015 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import logging
-import os
-import sys
-
-from swh.storage import objstorage
-
-if __name__ == '__main__':
- try:
- root_dir = sys.argv[1]
- dirname = sys.argv[2]
- except IndexError:
- print("Usage: swh-objstorage-add-dir OBJ_STORAGE_DIR DATA_DIR")
- sys.exit(1)
-
- logging.basicConfig(level=logging.INFO)
-
- objs = objstorage.ObjStorage(root_dir)
-
- dups = 0
- for root, _dirs, files in os.walk(dirname):
- for name in files:
- path = os.path.join(root, name)
- with open(path, 'rb') as f:
- try:
- objs.add(f.read())
- except objstorage.DuplicateObjError:
- dups += 1
-
- if dups:
- logging.info('skipped %d duplicate(s) file(s)' % dups)
diff --git a/bin/swh-objstorage-fsck b/bin/swh-objstorage-fsck
deleted file mode 100755
--- a/bin/swh-objstorage-fsck
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (C) 2015 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import logging
-import sys
-
-from swh.storage import objstorage
-
-if __name__ == '__main__':
- try:
- root_dir = sys.argv[1]
- except IndexError:
- print("Usage: swh-objstorage-add-dir OBJ_STORAGE_DIR")
- sys.exit(1)
-
- logging.basicConfig(level=logging.INFO)
-
- objs = objstorage.ObjStorage(root_dir)
-
- for obj_id in objs:
- try:
- objs.check(obj_id)
- except objstorage.Error as err:
- logging.error(err)
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -44,10 +44,6 @@
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/diffusion/DOBJS',
packages=find_packages(),
- scripts=[
- 'bin/swh-objstorage-add-dir',
- 'bin/swh-objstorage-fsck'
- ], # scripts to package
install_requires=parse_requirements() + parse_requirements('swh'),
setup_requires=['vcversioner'],
extras_require={'testing': parse_requirements('test')},
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -3,25 +3,81 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import os
+import logging
+import time
+
import click
import aiohttp.web
+from swh.objstorage import get_objstorage
from swh.objstorage.api.server import load_and_check_config, make_app
+CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+
+
+@click.group(context_settings=CONTEXT_SETTINGS)
+@click.option('--config-file', '-C', default=None,
+ type=click.Path(exists=True, dir_okay=False,),
+ help="Configuration file.")
+@click.option('--log-level', '-l', default='INFO',
+ type=click.Choice(logging._nameToLevel.keys()),
+ help="Log level (default to INFO)")
+@click.pass_context
+def cli(ctx, config_file, log_level):
+ ctx.ensure_object(dict)
+ logging.basicConfig(level=log_level)
+ cfg = load_and_check_config(config_file)
+ ctx.obj['config'] = cfg
+ ctx.obj['log_level'] = log_level
-@click.command()
-@click.argument('config-path', required=1)
+
+@cli.command('serve')
@click.option('--host', default='0.0.0.0', help="Host to run the server")
-@click.option('--port', default=5007, type=click.INT,
+@click.option('--port', '-p', default=5007, type=click.INT,
help="Binding port of the server")
-@click.option('--debug/--nodebug', default=True,
- help="Indicates if the server should run in debug mode")
-def main(config_path, host, port, debug):
- cfg = load_and_check_config(config_path)
- app = make_app(cfg)
- app.update(debug=bool(debug))
+@click.pass_context
+def serve(ctx, host, port):
+ app = make_app(ctx.obj['config'])
+ if ctx.obj['log_level'] == 'DEBUG':
+ app.update(debug=True)
aiohttp.web.run_app(app, host=host, port=int(port))
+@cli.command('import')
+@click.argument('directory', required=True, nargs=-1)
+@click.pass_context
+def import_directories(ctx, directory):
+ objstorage = get_objstorage(**ctx.obj['config']['objstorage'])
+ nobj = 0
+ volume = 0
+ t0 = time.time()
+ for dirname in directory:
+ for root, _dirs, files in os.walk(dirname):
+ for name in files:
+ path = os.path.join(root, name)
+ with open(path, 'rb') as f:
+ objstorage.add(f.read())
+ volume += os.stat(path).st_size
+ nobj += 1
+ click.echo('Imported %d files for a volume of %s bytes in %d seconds' %
+ (nobj, volume, time.time()-t0))
+
+
+@cli.command('fsck')
+@click.pass_context
+def fsck(ctx):
+ objstorage = get_objstorage(**ctx.obj['config']['objstorage'])
+ for obj_id in objstorage:
+ try:
+ objstorage.check(obj_id)
+ except objstorage.Error as err:
+ logging.error(err)
+
+
+def main():
+ return cli(auto_envvar_prefix='SWH_OBJSTORAGE')
+
+
if __name__ == '__main__':
main()

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 7:58 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215099

Event Timeline