diff --git a/.gitignore b/.gitignore index 52d28fb..42ba6fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ /sgloader/__pycache__/ /dataset/ *.pyc -/swh-git-loader/ /.coverage /scratch/swhgitloader.cProfile /scratch/swhgitloader.profile /scratch/save.p diff --git a/AUTHORS b/AUTHORS index 7828b71..3c44b3c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,23 +1,23 @@ Authors ======= -Below you can find a list of contributors to swh-git-loader and copyright -owners of code that has become part of swh-git-loader. They've contributed in a +Below you can find a list of contributors to swh-loader-git and copyright +owners of code that has become part of swh-loader-git. They've contributed in a variety of ways and this software wouldn't exist without them. Thank you! (For actual copyright notices, please refer to the individual source files and the Git repository.) Original authors ---------------- * Stefano Zacchiroli * Antoine R. Dumont Code contributors ----------------- * Contribute and ADD YOUR NAME HERE! diff --git a/README b/README index e633f03..bf78590 100644 --- a/README +++ b/README @@ -1,221 +1,221 @@ The Software Heritage Git Loader is a tool and a library to walk a local Git repository and inject into the SWH dataset all contained files that weren't known before. License ======= This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. See top-level LICENSE file for the full text of the GNU General Public License along with this program. Dependencies ============ Runtime ------- - python3 - python3-psycopg2 - python3-pygit2 Test ---- - python3-nose Requirements ============ - implementation language, Python3 - coding guidelines: conform to PEP8 - Git access: via libgit2/pygit - cache: implemented as Postgres tables Configuration ============= -swh-git-loader depends on some tools, here are the configuration files +swh-loader-git depends on some tools, here are the configuration files for those: swh-db-manager -------------- This is solely a tool in charge of db cleanup now. Create a configuration file in **\~/.config/db-manager.ini** ``` {.ini} [main] # Where to store the logs -log_dir = swh-git-loader/log +log_dir = swh-loader-git/log # url access to db db_url = dbname=swhgitloader ``` See for the db url's schema -swh-git-loader +swh-loader-git -------------- Create a configuration file in **\~/.config/swh/git-loader.ini**: ``` {.ini} [main] # Where to store the logs -log_dir = /tmp/swh-git-loader/log +log_dir = /tmp/swh-loader-git/log # how to access the backend (remote or local) backend-type = remote # backend-type remote: url access to api rest's backend # backend-type local: configuration file to backend file .ini (cf. back.ini file) backend = http://localhost:5000 ``` Note: - [DB url DSL](http://initd.org/psycopg/docs/module.html#psycopg2.connect) - the configuration file can be changed in the CLI with the flag \`-c \\` or \`--config-file \\` swh-backend ----------- Backend api. This Create a configuration file in **\~/.config/swh/back.ini**: ``` {.ini} [main] # where to store blob on disk -content_storage_dir = /tmp/swh-git-loader/content-storage +content_storage_dir = /tmp/swh-loader-git/content-storage # Where to store the logs -log_dir = swh-git-loader/log +log_dir = swh-loader-git/log # url access to db: dbname= (host= port= user= password=) db_url = dbname=swhgitloader # compute folder's depth on disk aa/bb/cc/dd # folder_depth = 2 # To open to the world, 0.0.0.0 #host = 127.0.0.1 # Debugger (for dev only) debug = true # server port to listen to requests port = 6000 ``` See for the db url's schema Run === Environment initialization -------------------------- ``` {.bash} export PYTHONPATH=`pwd`:$PYTHONPATH ``` Backend ------- ### With initialization This depends on swh-sql repository, so: ``` {.bash} cd /path/to/swh-sql && make clean initdb DBNAME=softwareheritage-dev ``` Using the Makefile eases: ``` {.bash} make drop-db create-db run-back FOLLOW_LOG=-f ``` ### without initialization Running the backend. ``` {.bash} ./bin/swh-backend -v ``` With makefile: ``` {.bash} make run-back FOLLOW_LOG=-f ``` Help ---- ``` {.bash} -bin/swh-git-loader --help +bin/swh-loader-git --help bin/swh-db-manager --help ``` Parse a repository from a clean slate ------------------------------------- Clean and initialize the model then parse the repository git: ``` {.bash} bin/swh-db-manager cleandb -bin/swh-git-loader load /path/to/git/repo +bin/swh-loader-git load /path/to/git/repo ``` For ease: ``` {.bash} time make cleandb run REPO_PATH=~/work/inria/repo/swh-git-cloner ``` Parse an existing repository ---------------------------- ``` {.bash} -bin/swh-git-loader load /path/to/git/repo +bin/swh-loader-git load /path/to/git/repo ``` Clean data ---------- This will truncate the relevant table in the schema ``` {.bash} bin/swh-db-manager cleandb ``` For ease: ``` {.bash} make cleandb ``` Init data --------- ``` {.bash} make drop-db create-db ``` diff --git a/bin/swh-backend b/bin/swh-backend index 7f2c972..561081c 100755 --- a/bin/swh-backend +++ b/bin/swh-backend @@ -1,58 +1,58 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os from swh.backend import api from swh.conf import reader from swh.storage.objstorage import ObjStorage # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/back.ini' # default configuration DEFAULT_CONF = { - 'content_storage_dir' : ('string', '/tmp/swh-git-loader/content-storage'), - 'log_dir' : ('string', '/tmp/swh-git-loader/log'), + 'content_storage_dir' : ('string', '/tmp/swh-loader-git/content-storage'), + 'log_dir' : ('string', '/tmp/swh-loader-git/log'), 'db_url' : ('string', 'dbname=softwareheritage-dev'), 'folder_depth' : ('int' , 4), 'debug' : ('bool' , None), 'host' : ('string', '127.0.0.1'), 'port' : ('int' , 5000) } def parse_args(): """Parse the configuration for the cli. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') args = cli.parse_args() return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir', 'content_storage_dir') conf.update({ 'objstorage': ObjStorage(conf['content_storage_dir'], conf['folder_depth']) }) logging.basicConfig(filename=os.path.join(conf['log_dir'], 'back.log'), level=logging.DEBUG if args.verbose else logging.INFO) api.run(conf) diff --git a/bin/swh-db-manager b/bin/swh-db-manager index e48b0dd..b1eb35e 100755 --- a/bin/swh-db-manager +++ b/bin/swh-db-manager @@ -1,56 +1,56 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os from swh import manager from swh.conf import reader # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/db-manager.ini' # default configuration (can be overriden by the DEFAULT_CONF_FILE) DEFAULT_CONF = { - 'log_dir': ('string', '/tmp/swh-git-loader/log'), + 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'db_url' : ('string', 'dbname=softwareheritage-dev') } def parse_args(): """Parse the configuration for the cli. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') subcli = cli.add_subparsers(dest='action') subcli.add_parser('initdb', help='initialize DB') subcli.add_parser('cleandb', help='clean DB') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir') logging.basicConfig(filename=os.path.join(conf['log_dir'], 'db-manager.log'), level=logging.DEBUG if args.verbose else logging.INFO) manager.manage(args.action, conf['db_url']) diff --git a/bin/swh-git-loader b/bin/swh-git-loader index f7826c2..a4d3374 100755 --- a/bin/swh-git-loader +++ b/bin/swh-git-loader @@ -1,67 +1,67 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os from swh.gitloader import loader from swh.conf import reader # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/git-loader.ini' # default configuration (can be overriden by the DEFAULT_CONF_FILE) DEFAULT_CONF = { - 'log_dir': ('string', '/tmp/swh-git-loader/log'), + 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'backend-type': ('string', 'remote'), 'backend': ('string', 'http://localhost:5000'), } # Another example of configuration: # DEFAULT_CONF = { -# 'log_dir': ('string', '/tmp/swh-git-loader/log'), +# 'log_dir': ('string', '/tmp/swh-loader-git/log'), # 'backend-type': ('string', 'local'), # 'backend': ('string', '~/.config/swh/back.ini'), # } def parse_args(): """Parse the CLI arguments. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') subcli = cli.add_subparsers(dest='action') load_cli = subcli.add_parser('load', help='load Git repo into DB') load_cli.add_argument('repository', help='Git repository path') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir') conf['action'] = args.action conf['repo_path'] = args.repository logging.basicConfig(filename=os.path.join(conf['log_dir'], 'sgloader.log'), level=logging.DEBUG if args.verbose else logging.INFO) loader.load(conf) diff --git a/doc/api-backend-protocol.txt b/doc/api-backend-protocol.txt index 4555c64..cb5bc49 100644 --- a/doc/api-backend-protocol.txt +++ b/doc/api-backend-protocol.txt @@ -1,195 +1,195 @@ Design considerations ===================== # Goal Load the representation of a git, svn, csv, tarball, et al. repository in software heritage's backend. # Nomenclature cf. swh-sql/swh.sql comments -> FIXME: find a means to compute docs from sql From this point on, `signatures` means: - the git sha1s, the sha1 and sha256 the object's content for object of type content - the git sha1s for all other object types (directories, contents, revisions, occurrences, releases) -A worker is one instance running swh-git-loader to parse and load a repository +A worker is one instance running swh-loader-git to parse and load a repository in the backend. It is not distributed. The backend api discuss with one or many workers. It is distributed. # Scenario In the following, we will describe with different granularities what will happen between 1 worker and the backend api. ## 1 A worker parses a repository. It sends the parsing result to the backend in muliple requests/responses. The worker sends list of sha1s (git sha1s) encountered. The server responds with an unknowns sha1s list. The worker sends those sha1s and their associated data to the server. The server store what it receives. ## 2 01. Worker parses local repository and build a memory model of it. 02. HAVE: Worker sends repository's contents signatures to the backend for it to filter what it knows. 03. WANT: Backend replies with unknown contents sha1s. 04. SAVE: Worker sends all `content` data through 1 (or more) request(s). 05. SAVED: Backend stores them and finish the transaction(s). 06. HAVE: Worker sends repository's directories' signatures to the backend for it to filter. 07. WANT: Backend replies with unknown directory sha1s. 08. SAVE: Worker sends all `directory`s' data through 1 (or more) request(s). 09. SAVED: Backend stores them and finish the transaction(s). 10. HAVE: Worker sends repository's revisions' signatures to the backend. 11. WANT: Backend replies with unknown revisions' sha1s. 12. SAVE: Worker sends the `revision`s' data through 1 (or more) request(s). 13. SAVED: Backend stores them and finish the transaction(s). 14. SAVE: Worker sends repository's occurrences for the backend to save what it does not know yet. 15. SAVE: Worker sends repository's releases for the backend to save what it does not know yet. 16. Worker is done. ## 3 01. Worker parses repository and builds a data memory model. The data memory model has the following structure for each possible type: - signatures list - map indexed by git sha1, object representation. Type of object ; content, directory, revision, release, occurence is kept. 02. Worker sends in the api backend's protocol the sha1s. 03. Api Backend receives the list of sha1s, filters out unknown sha1s and replies to the worker. 04. Worker receives the list of unknown sha1s. The worker builds the unknowns `content`s' list. A list of contents, for each content: - git's sha1 (when parsing git repository) - sha1 content (as per content's sha1) - sha256 content - content's size - content And sends it to the api's backend. 05. Backend receives the data and: - computes from the `content` the signatures (sha1, sha256). FIXME: Not implemented yet - checks the signatures match the client's data FIXME: Not Implemented yet - Stores the content on the file storage - Persist in the db the received data If any errors is detected during the process (checksum do not match, writing error, ...), the db transaction is rollbacked and a failure is sent to the client. Otherwise, the db transaction is committed and a success is sent back to the client. *Note* Optimization possible: slice in multiple queries. 06. Worker receives the result from the api. If failure, worker stops. The task is done. Otherwise, the worker continues by sending the list of `directory` structure. A list of directories, for each directory: - sha1 - directory's content - list of directory entries: - name : relative path to parent entry or root - sha1 : pointer to the object this directory points to - type : whether entry is a file or a dir - perms : unix-like permissions - atime : time of last access FIXME: Not the right time yet - mtime : time of last modification FIXME: Not the right time yet - ctime : time of last status change FIXME: Not the right time yet - directory: parent directory sha1 And sends it to the api's backend. *Note* Optimization possible: slice in multiple queries. 07. Api backend receives the data. Persists the directory's content on the file storage. Persist the directory and directory entries on the db's side in respect to the previous directories and contents stored. If any error is raised, the transaction is rollbacked and an error is sent back to the client (worker). Otherwise, the transaction is committed and the success is sent back to the client. 08. Worker receives the result from the api. If failure, worker stops. The task is done. Otherwise, the worker continues by building the list of unknown `revision`s. A list of revisions, for each revision: - sha1, the revision's sha1 - revision's parent sha1s, the list of revision parents - content, the revision's content - revision's date - directory id the revision points to - message, the revision's message - author - committer And sends it to the api's backend. *Note* Optimization possible: slice in multiple queries. 09. Api backend receives data. Persists the revisions' content on the file storage. Persist the directory and directory entries on the db's side in respect to the previous directories and contents stored. If any error is raised, the transaction is rollbacked and an error is sent back to the client (worker). Otherwise, the transaction is committed and the success is sent back to the client. 10. Worker receives the result. Worker sends the complete occurrences list. A list of occurrences, for each occurrence: - sha1, the sha1 the occurrences points to - reference, the occurrence's name - url-origin, the origin of the repository 11. The backend receives the list of occurrences and persist only what it does not know. Acks the result to the backend. 12. Worker sends the complete releases list. A list of releases, for each release: - sha1, the release sha1 - content, the content of the appointed commit - revision, the sha1 the release points to - name, the release's name - date, the release's date # FIXME: find the tag's date, - author, the release's author information - comment, the release's message 13. The backend receives the list of releases and persists only what it does not know. Acks the result to the backend. 14. Worker received the result and stops anyway. The task is done. ## Protocol details - worker serializes the content's payload (python data structure) as pickle format - backend unserializes the request's payload as python data structure diff --git a/resources/local-git-loader.ini b/resources/local-git-loader.ini index b6fb920..da492be 100644 --- a/resources/local-git-loader.ini +++ b/resources/local-git-loader.ini @@ -1,10 +1,10 @@ [main] # Where to store the logs -log_dir = /tmp/swh-git-loader/log +log_dir = /tmp/swh-loader-git/log # how to access the backend (remote or local) backend-type = local # backend-type remote: url access to api rest's backend # backend-type local: configuration file to backend file .ini (cf. back.ini file) backend = ~/.config/swh/back.ini diff --git a/resources/remote-git-loader.ini b/resources/remote-git-loader.ini index 3a11f59..223e9c1 100644 --- a/resources/remote-git-loader.ini +++ b/resources/remote-git-loader.ini @@ -1,10 +1,10 @@ [main] # Where to store the logs -log_dir = /tmp/swh-git-loader/log +log_dir = /tmp/swh-loader-git/log # how to access the backend (remote or local) backend-type = remote # backend-type remote: url access to api rest's backend # backend-type local: configuration file to backend file .ini (cf. back.ini file) backend = http://localhost:5000 diff --git a/resources/test/back.ini b/resources/test/back.ini index c4d2894..927957e 100644 --- a/resources/test/back.ini +++ b/resources/test/back.ini @@ -1,22 +1,22 @@ [main] # where to store blob on disk -content_storage_dir = /tmp/swh-git-loader/test/content-storage +content_storage_dir = /tmp/swh-loader-git/test/content-storage # Where to store the logs -log_dir = /tmp/swh-git-loader/test/log +log_dir = /tmp/swh-loader-git/test/log # url access to db: dbname= (host= port= user= password=) db_url = dbname=softwareheritage-dev-test # compute folder's depth on disk aa/bb/cc/dd #folder_depth = 4 # To open to the world, 0.0.0.0 #host = 127.0.0.1 # Debugger (for dev only) debug = true # server port to listen to requests port = 5001 diff --git a/swh/conf/reader.py b/swh/conf/reader.py index 0f3ee7d..f332883 100755 --- a/swh/conf/reader.py +++ b/swh/conf/reader.py @@ -1,54 +1,54 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import configparser import os _map_convert_fn = {'int': int, 'bool': lambda x: x == 'true'} # conversion per type def read(conf_file, default_conf=None): """Read the user's configuration file. Fill in the gap using `default_conf`. `default_conf` is similar to this: DEFAULT_CONF = { - 'a': ('string', '/tmp/swh-git-loader/log'), - 'b': ('string', 'dbname=swhgitloader') + 'a': ('string', '/tmp/swh-loader-git/log'), + 'b': ('string', 'dbname=swhloadergit') 'c': ('bool', true) 'e': ('bool', None) 'd': ('int', 10) } """ config = configparser.ConfigParser(defaults=default_conf) config.read(os.path.expanduser(conf_file)) conf = config._sections['main'] # remaining missing default configuration key are set # also type conversion is enforced for underneath layer for key in default_conf: nature_type, default_value = default_conf[key] val = conf.get(key, None) if not val: # fallback to default value conf[key] = default_value else: # value present but in string format, force type conversion conf[key] = _map_convert_fn.get(nature_type, lambda x: x)(val) return conf def prepare_folders(conf, *keys): """Prepare the folder mentioned in config under keys. """ def makedir(folder): if not os.path.exists(folder): os.makedirs(folder) for key in keys: makedir(conf[key]) diff --git a/swh/gitloader/local_store.py b/swh/gitloader/local_store.py index 7b815db..c25533f 100644 --- a/swh/gitloader/local_store.py +++ b/swh/gitloader/local_store.py @@ -1,96 +1,96 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.store import store, db, service from swh.conf import reader from swh.storage.objstorage import ObjStorage # FIXME: duplicated from bin/swh-backend... # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/back.ini' # default configuration DEFAULT_CONF = { - 'content_storage_dir': ('string', '/tmp/swh-git-loader/content-storage'), - 'log_dir': ('string', '/tmp/swh-git-loader/log'), + 'content_storage_dir': ('string', '/tmp/swh-loader-git/content-storage'), + 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'db_url': ('string', 'dbname=softwareheritage-dev'), 'folder_depth': ('int', 4), 'debug': ('bool', None), 'host': ('string', '127.0.0.1'), 'port': ('int', 5000) } def store_only_new(db_conn, conf, obj_type, obj): """Store object if not already present. """ obj.update({'type': obj_type}) if not store.find(db_conn, obj): store.add(db_conn, conf, obj) _obj_to_persist_fn = {store.Type.revision: service.add_revisions} def store_unknown_objects(db_conn, conf, obj_type, swhmap): """Load objects to the backend. """ sha1s = swhmap.keys() # have: filter unknown obj unknown_obj_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) if not unknown_obj_sha1s: return True # seen: now store in backend persist_fn = _obj_to_persist_fn.get(obj_type, service.add_objects) obj_fulls = map(swhmap.get, unknown_obj_sha1s) return persist_fn(db_conn, conf, obj_type, obj_fulls) def load_to_back(conf, swh_repo): """Load to the backend the repository swh_repo. """ with db.connect(conf['db_url']) as db_conn: # First, store/retrieve the origin identifier # FIXME: should be done by the cloner worker (which is not yet plugged # on the right swh db ftm) service.add_origin(db_conn, swh_repo.get_origin()) # First reference all unknown persons service.add_persons(db_conn, conf, store.Type.person, swh_repo.get_persons()) res = store_unknown_objects(db_conn, conf, store.Type.content, swh_repo.get_contents()) if res: res = store_unknown_objects(db_conn, conf, store.Type.directory, swh_repo.get_directories()) if res: res = store_unknown_objects(db_conn, conf, store.Type.revision, swh_repo.get_revisions()) if res: # brutally send all remaining occurrences service.add_objects(db_conn, conf, store.Type.occurrence, swh_repo.get_occurrences()) # and releases (the idea here is that compared to existing # objects, the quantity is less) service.add_objects(db_conn, conf, store.Type.release, swh_repo.get_releases()) def prepare_and_load_to_back(backend_setup_file, swh_repo): # Read the configuration file (no check yet) conf = reader.read(backend_setup_file or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'content_storage_dir') conf.update({ 'objstorage': ObjStorage(conf['content_storage_dir'], conf['folder_depth']) }) load_to_back(conf, swh_repo) diff --git a/swh/tests/test_utils.py b/swh/tests/test_utils.py index c024319..cf98d6c 100644 --- a/swh/tests/test_utils.py +++ b/swh/tests/test_utils.py @@ -1,59 +1,59 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import time import os import shutil import tempfile from swh.backend import api from swh.storage.objstorage import ObjStorage import test_initdb def now(): """Build the date as of now in the api's format. """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) def list_files_from(root_path): """Compute the list of files from root_path. """ f = [] for (dirpath, dirnames, filenames) in os.walk(root_path): f.extend(filenames) return f def app_client(db_url="dbname=softwareheritage-dev-test"): """Setup the application ready for testing. """ - content_storage_dir = tempfile.mkdtemp(prefix='test-swh-git-loader.', + content_storage_dir = tempfile.mkdtemp(prefix='test-swh-loader-git.', dir='/tmp') folder_depth = 2 api.app.config['conf'] = {'db_url': db_url, 'content_storage_dir': content_storage_dir, - 'log_dir': '/tmp/swh-git-loader/log', + 'log_dir': '/tmp/swh-loader-git/log', 'folder_depth': folder_depth, 'debug': 'true', 'objstorage': ObjStorage(content_storage_dir, folder_depth) } api.app.config['TESTING'] = True app = api.app.test_client() test_initdb.prepare_db(db_url) return app, db_url, content_storage_dir def app_client_teardown(content_storage_dir): """Tear down app client's context. """ shutil.rmtree(content_storage_dir, ignore_errors=True)