diff --git a/AUTHORS b/AUTHORS index 3c44b3c..72ac9eb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,23 +1,24 @@ Authors ======= Below you can find a list of contributors to swh-loader-git and copyright owners of code that has become part of swh-loader-git. They've contributed in a variety of ways and this software wouldn't exist without them. Thank you! (For actual copyright notices, please refer to the individual source files and the Git repository.) Original authors ---------------- * Stefano Zacchiroli * Antoine R. Dumont +* Nicolas Dandrimont Code contributors ----------------- * Contribute and ADD YOUR NAME HERE! diff --git a/Makefile.local b/Makefile.local deleted file mode 100644 index 091bf4b..0000000 --- a/Makefile.local +++ /dev/null @@ -1,95 +0,0 @@ -# -*- makefile -*- - -FLAKE = flake8 -BINDIR = bin -SRCDIR = swh -REPO_PATH=./swh-loader-git-testdata - -# add -v for example -FLAG= - -DB=softwareheritage-dev - -SWH_LOADER=$(BINDIR)/swh-loader-git -SWH_DB_MANAGER=$(BINDIR)/swh-db-manager -SWH_BACK=$(BINDIR)/swh-backend - -SQL_FOLDER=../swh-storage/sql/ - -# could use cProfile -PROFILE_TYPE=profile - -FOLLOW_LOG=-f - -# Adapt python-path to use other swh modules -_PYPATH=`pwd`:`pwd`/../swh-core:`pwd`/../swh-storage - -deps: - apt-get install -y \ - python3 \ - python3-pygit2 \ - python3-psycopg2 \ - python3-nose \ - python3-flask \ - python3-requests \ - python3-retrying \ - ipython3 - -cover: - PYTHONPATH=$(_PYPATH) make coverage - -clean: - rm -rf /tmp/swh-loader-git/content-storage - -prepare: - mkdir -p /tmp/swh-loader-git/content-storage - -cleandb: clean - PYTHONPATH=$(_PYPATH) $(SWH_DB_MANAGER) $(FLAG) cleandb - -run-remote: - PYTHONPATH=$(_PYPATH) $(SWH_LOADER) $(FLAG) --config ./resources/remote-loader-git.ini load $(REPO_PATH) - -run-local: - PYTHONPATH=$(_PYPATH) $(SWH_LOADER) $(FLAG) --config ./resources/local-loader-git.ini load $(REPO_PATH) - -run: - # works with the default ~/.config/swh/loader-git.ini file - PYTHONPATH=$(_PYPATH) $(SWH_LOADER) $(FLAG) load $(REPO_PATH) - -run-back: - PYTHONPATH=$(_PYPATH) $(SWH_BACK) $(FLAG) - -connect-db: - psql -d $(DB) - -create-db: - cd $(SQL_FOLDER) && make clean initdb - -drop-db: - cd $(SQL_FOLDER) && make clean dropdb - -check-meta: - @echo "Repository: $(REPO_PATH)" - - @echo "Git metadata:" - @$(BINDIR)/dir-git-repo-meta.sh $(REPO_PATH) - @echo - - @echo "DB metadata:" - @$(BINDIR)/db-git-repo-meta.sh $(DB) - @echo - -log-loader: - tail $(FOLLOW_LOG) /tmp/swh-loader-git/log/sgloader.log - -log-back: - tail $(FOLLOW_LOG) /tmp/swh-loader-git/log/back.log - -profile-run: - PYTHONPATH=$(_PYPATH) python3 -m $(PROFILE_TYPE) -o ./scratch/swhgitloader.$(PROFILE_TYPE) ./scratch/profile-swhgitloader.py - -profile-stats: - PYTHONPATH=$(_PYPATH) ./scratch/analyse-profile.py - -include Makefile.tests diff --git a/Makefile.tests b/Makefile.tests deleted file mode 100644 index ee3627b..0000000 --- a/Makefile.tests +++ /dev/null @@ -1,83 +0,0 @@ -# -*- makefile -*- -NOSEFLAGS=--nologcapture -v -DB_TEST=$(DB)-test -TESTDIR = ./swh/loader/git/tests - -test-connect-db: - psql $(DB_TEST) - -test-create-db: - cd $(SQL_FOLDER) && make clean initdb DBNAME=$(DB_TEST) - -test-drop-db: - cd $(SQL_FOLDER) && make clean dropdb DBNAME=$(DB_TEST) - -test-cleandb: - PYTHONPATH=$(_PYPATH) $(SWH_DB_MANAGER) $(FLAG) --config ./resources/test/db-manager.ini cleandb - -test-clean: - rm -rf /tmp/swh-loader-git/test/ - -test-prepare: - mkdir -p /tmp/swh-loader-git/test/ - -test-log-back: - tail $(FOLLOW_LOG) /tmp/swh-loader-git/test/log/back.log - -test-check-meta: - @echo "DB $(DB_TEST) metadata:" - @$(BINDIR)/db-git-repo-meta.sh $(DB_TEST) - @echo - -tests: - PYTHONPATH=$(_PYPATH) make test - -test-run-back: - PYTHONPATH=$(_PYPATH) $(SWH_BACK) $(FLAG) --config ./resources/test/back.ini - -test-http: - $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_http.py - -test-swhrepo: - $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_swhrepo.py - -test-api: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api*.py - -test-api-post-per-type: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_post_*.py - -test-api-content: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_content.py - -test-api-directory: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_directory.py - -test-api-revision: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_revision.py - -test-api-release: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_release.py - -test-api-occurrence: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_occurrence.py - -test-api-home: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_home.py - -test-api-origin: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_origin.py - -test-api-person: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_person.py - -test-file: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_file.py - -test-remote-loader: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_remote_loader.py - -test-local-loader: - PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_local_loader.py - -test-loaders: test-local-loader test-remote-loader diff --git a/bin/db-git-repo-meta.sh b/bin/db-git-repo-meta.sh deleted file mode 100755 index 754e062..0000000 --- a/bin/db-git-repo-meta.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# Use: $0 -# will compute the number of revision, directory, content from db respectively. - -DB=$1 - -count() { - DB=$1 - QUERY=$2 - psql -d $1 --command "$QUERY;" | tail -3 | head -1 -} - -NB_CONTENTS=$(count $DB "select count(*) from content;") -NB_DIRECTORIES=$(count $DB "select count(*) from directory;") -NB_DIRECTORY_ENTRIES=$(count $DB "select count(*) from directory_entry;") -NB_REVISIONS=$(count $DB "select count(*) from revision;") -NB_RELEASES=$(count $DB "select count(*) from release;") -NB_PERSONS=$(count $DB "select count(*) from person;") - -cat<, -# Antoine R. Dumont -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import argparse -import logging -import os - -from swh.loader.git.backend import api -from swh.loader.git.conf import reader -from swh.storage.objstorage import ObjStorage - - -# Default configuration file -DEFAULT_CONF_FILE = '~/.config/swh/back.ini' - - -# default configuration -DEFAULT_CONF = { - 'content_storage_dir' : ('string', '/tmp/swh-loader-git/content-storage'), - 'log_dir' : ('string', '/tmp/swh-loader-git/log'), - 'db_url' : ('string', 'dbname=softwareheritage-dev'), - 'folder_depth' : ('int' , 4), - 'debug' : ('bool' , None), - 'host' : ('string', '127.0.0.1'), - 'port' : ('int' , 5000) -} - - -def parse_args(): - """Parse the configuration for the cli. - """ - cli = argparse.ArgumentParser( - description='Parse git repository objects to load them into DB.') - cli.add_argument('--verbose', '-v', action='store_true', - help='Verbosity level in log file.') - cli.add_argument('--config', '-c', help='configuration file path') - - args = cli.parse_args() - - return args - - -if __name__ == '__main__': - args = parse_args() - conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) - reader.prepare_folders(conf, 'log_dir', 'content_storage_dir') - conf.update({ - 'objstorage': ObjStorage(conf['content_storage_dir'], - conf['folder_depth']) - }) - logging.basicConfig(filename=os.path.join(conf['log_dir'], 'back.log'), - level=logging.DEBUG if args.verbose else logging.INFO) - api.run(conf) diff --git a/bin/swh-db-manager b/bin/swh-db-manager deleted file mode 100755 index a690114..0000000 --- a/bin/swh-db-manager +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2015 Stefano Zacchiroli , -# Antoine R. Dumont -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import argparse -import logging -import os - - -from swh.loader.git import manager -from swh.loader.git.conf import reader - -# Default configuration file -DEFAULT_CONF_FILE = '~/.config/swh/db-manager.ini' - - -# default configuration (can be overriden by the DEFAULT_CONF_FILE) -DEFAULT_CONF = { - 'log_dir': ('string', '/tmp/swh-loader-git/log'), - 'db_url' : ('string', 'dbname=softwareheritage-dev') -} - - -def parse_args(): - """Parse the configuration for the cli. - """ - cli = argparse.ArgumentParser( - description='Parse git repository objects to load them into DB.') - cli.add_argument('--verbose', '-v', action='store_true', - help='Verbosity level in log file.') - cli.add_argument('--config', '-c', help='configuration file path') - - subcli = cli.add_subparsers(dest='action') - subcli.add_parser('initdb', help='initialize DB') - subcli.add_parser('cleandb', help='clean DB') - - args = cli.parse_args() - if not args.action: - cli.error('no action given') - - return args - - -if __name__ == '__main__': - args = parse_args() - conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) - reader.prepare_folders(conf, 'log_dir') - - logging.basicConfig(filename=os.path.join(conf['log_dir'], 'db-manager.log'), - level=logging.DEBUG if args.verbose else logging.INFO) - - manager.manage(args.action, conf['db_url']) diff --git a/bin/swh-loader-git b/bin/swh-loader-git index 3d6ae75..f6ab509 100755 --- a/bin/swh-loader-git +++ b/bin/swh-loader-git @@ -1,67 +1,40 @@ #!/usr/bin/env python3 -# Copyright (C) 2015 Stefano Zacchiroli , -# Antoine R. Dumont -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import argparse import logging -import os - -from swh.loader.git import loader -from swh.loader.git.conf import reader - -# Default configuration file -DEFAULT_CONF_FILE = '~/.config/swh/loader-git.ini' - - -# default configuration (can be overriden by the DEFAULT_CONF_FILE) -DEFAULT_CONF = { - 'log_dir': ('string', '/tmp/swh-loader-git/log'), - 'backend-type': ('string', 'remote'), - 'backend': ('string', 'http://localhost:5000'), +import sys + +import pygit2 + +from swh.core import config +from swh.loader.git.git import BulkLoader + +DEFAULT_CONFIG = { + 'storage_args': ('list[str]', ['http://localhost:5000/']), + 'storage_class': ('str', 'remote_storage'), + 'repo_path': ('str', None), + + 'origin': ('int', -1), + 'authority': ('int', 1), + 'validity': ('str', '2015-01-01 00:00:00+00'), + + 'create_origin': ('bool', True), + 'send_contents': ('bool', True), + 'send_directories': ('bool', True), + 'send_revisions': ('bool', True), + 'send_releases': ('bool', True), + 'send_occurrences': ('bool', True), + + 'content_packet_size': ('int', 100000), + 'directory_packet_size': ('int', 25000), + 'revision_packet_size': ('int', 100000), + 'release_packet_size': ('int', 100000), + 'occurrence_packet_size': ('int', 100000), } -# Another example of configuration: -# DEFAULT_CONF = { -# 'log_dir': ('string', '/tmp/swh-loader-git/log'), -# 'backend-type': ('string', 'local'), -# 'backend': ('string', '~/.config/swh/back.ini'), -# } - - -def parse_args(): - """Parse the CLI arguments. - """ - cli = argparse.ArgumentParser( - description='Parse git repository objects to load them into DB.') - cli.add_argument('--verbose', '-v', action='store_true', - help='Verbosity level in log file.') - cli.add_argument('--config', '-c', help='configuration file path') - - subcli = cli.add_subparsers(dest='action') - - load_cli = subcli.add_parser('load', help='load Git repo into DB') - load_cli.add_argument('repository', help='Git repository path') - - args = cli.parse_args() - if not args.action: - cli.error('no action given') - - return args - - -if __name__ == '__main__': - args = parse_args() - conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) - reader.prepare_folders(conf, 'log_dir') - - conf['action'] = args.action - conf['repo_path'] = args.repository +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(message)s') - logging.basicConfig(filename=os.path.join(conf['log_dir'], 'sgloader.log'), - level=logging.DEBUG if args.verbose else logging.INFO) +my_config = config.read(sys.argv[1], DEFAULT_CONFIG) - loader.load(conf) +loader = BulkLoader(my_config) +loader.process() diff --git a/bin/test-bulk-loader-all b/bin/swh-loader-git-multi similarity index 100% rename from bin/test-bulk-loader-all rename to bin/swh-loader-git-multi diff --git a/bin/test-bulk-loader b/bin/test-bulk-loader deleted file mode 100755 index f6ab509..0000000 --- a/bin/test-bulk-loader +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -import logging -import sys - -import pygit2 - -from swh.core import config -from swh.loader.git.git import BulkLoader - -DEFAULT_CONFIG = { - 'storage_args': ('list[str]', ['http://localhost:5000/']), - 'storage_class': ('str', 'remote_storage'), - 'repo_path': ('str', None), - - 'origin': ('int', -1), - 'authority': ('int', 1), - 'validity': ('str', '2015-01-01 00:00:00+00'), - - 'create_origin': ('bool', True), - 'send_contents': ('bool', True), - 'send_directories': ('bool', True), - 'send_revisions': ('bool', True), - 'send_releases': ('bool', True), - 'send_occurrences': ('bool', True), - - 'content_packet_size': ('int', 100000), - 'directory_packet_size': ('int', 25000), - 'revision_packet_size': ('int', 100000), - 'release_packet_size': ('int', 100000), - 'occurrence_packet_size': ('int', 100000), -} - -logging.basicConfig(level=logging.INFO, - format='%(asctime)s %(message)s') - -my_config = config.read(sys.argv[1], DEFAULT_CONFIG) - -loader = BulkLoader(my_config) -loader.process() diff --git a/doc/api-backend-protocol.txt b/doc/attic/api-backend-protocol.txt similarity index 100% rename from doc/api-backend-protocol.txt rename to doc/attic/api-backend-protocol.txt diff --git a/doc/git-loading-design.txt b/doc/attic/git-loading-design.txt similarity index 100% rename from doc/git-loading-design.txt rename to doc/attic/git-loading-design.txt diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 638ef17..0000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[flake8] -# ignore = E226,E302,E41 -# max-line-length = 79 -exclude = swh/tests/* -# max-complexity = 10 -# source: http://flake8.readthedocs.org/en/latest/config.html?highlight=ignore diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/backend/__init__.py b/swh/loader/git/backend/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/git/backend/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/backend/api.py b/swh/loader/git/backend/api.py deleted file mode 100755 index bbb3859..0000000 --- a/swh/loader/git/backend/api.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import logging - -from flask import Flask, Response, make_response, request - -from swh.loader.git.storage import storage, db, service -from swh.loader.git.protocols import serial - - -# api's definition -app = Flask(__name__) - - -def read_request_payload(request): - """Read the request's payload. - """ # TODO: Check the signed pickled data? - return serial.load(request.stream) - - -def write_response(data): - """Write response from data. - """ - return Response(serial.dumps(data), mimetype=serial.MIMETYPE) - - -@app.route('/') -def hello(): - """A simple api to define what the server is all about. - FIXME: A redirect towards a static page defining the routes would be nice. - """ - return 'Dev SWH API' - - -# from uri to type -_uri_types = {'revisions': storage.Type.revision, - 'directories': storage.Type.directory, - 'contents': storage.Type.content, - 'releases': storage.Type.release, - 'occurrences': storage.Type.occurrence, - 'persons': storage.Type.person} - - -def _do_action_with_payload(conf, action_fn, uri_type, id, map_result_fn): - uri_type_ok = _uri_types.get(uri_type, None) - if uri_type_ok is None: - return make_response('Bad request!', 400) - - vcs_object = read_request_payload(request) - vcs_object.update({'id': id, - 'type': uri_type_ok}) - return action_fn(conf, vcs_object, map_result_fn) - - -# occurrence type is not dealt the same way -_post_all_uri_types = {'revisions': storage.Type.revision, - 'directories': storage.Type.directory, - 'contents': storage.Type.content} - - -@app.route('/vcs//', methods=['POST']) -def filter_unknowns_type(uri_type): - """Filters unknown sha1 to the backend and returns them. - """ - if request.headers.get('Content-Type') != serial.MIMETYPE: - return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) - - obj_type = _post_all_uri_types.get(uri_type) - if obj_type is None: - return make_response('Bad request. Type not supported!', 400) - - sha1s = read_request_payload(request) - config = app.config['conf'] - - with db.connect(config['db_url']) as db_conn: - unknowns_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) - if unknowns_sha1s is None: - return make_response('Bad request!', 400) - else: - return write_response(unknowns_sha1s) - -@app.route('/vcs/persons/', methods=['POST']) -def post_person(): - """Find a person. - """ - if request.headers.get('Content-Type') != serial.MIMETYPE: - return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) - - origin = read_request_payload(request) - config = app.config['conf'] - - with db.connect(config['db_url']) as db_conn: - try: - person_found = service.find_person(db_conn, origin) - if person_found: - return write_response(person_found) - else: - return make_response('Person not found!', 404) - except: - return make_response('Bad request!', 400) - - -@app.route('/origins/', methods=['POST']) -def post_origin(): - """Find an origin. - """ - if request.headers.get('Content-Type') != serial.MIMETYPE: - return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) - - origin = read_request_payload(request) - config = app.config['conf'] - - with db.connect(config['db_url']) as db_conn: - try: - origin_found = service.find_origin(db_conn, origin) - if origin_found: - return write_response(origin_found) - else: - return make_response('Origin not found!', 404) - except: - return make_response('Bad request!', 400) - - -@app.route('/origins/', methods=['PUT']) -def put_origin(): - """Create an origin or returns it if already existing. - """ - if request.headers.get('Content-Type') != serial.MIMETYPE: - return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) - - origin = read_request_payload(request) - config = app.config['conf'] - - with db.connect(config['db_url']) as db_conn: - try: - origin_found = service.add_origin(db_conn, origin) - return write_response(origin_found) # FIXME: 204 - except: - return make_response('Bad request!', 400) - - -@app.route('/vcs//', methods=['PUT']) -def put_all(uri_type): - """Store or update given objects (uri_type in {contents, directories, releases). - """ - if request.headers.get('Content-Type') != serial.MIMETYPE: - return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) - - payload = read_request_payload(request) - obj_type = _uri_types[uri_type] - - config = app.config['conf'] - - with db.connect(config['db_url']) as db_conn: - service.persist(db_conn, config, obj_type, payload) - - return make_response('Successful creation!', 204) - - -def add_object(config, vcs_object, map_result_fn): - """Add object in storage. - - config is the configuration needed for the backend to execute query - - vcs_object is the object to look for in the backend - - map_result_fn is a mapping function which takes the backend's result - and transform its output accordingly. - - This function returns an http response of the result. - """ - type = vcs_object['type'] - id = vcs_object['id'] - logging.debug('storage %s %s' % (type, id)) - - with db.connect(config['db_url']) as db_conn: - res = service.add_objects(db_conn, config, type, [vcs_object]) - return make_response(map_result_fn(id, res), 204) - - -def _do_lookup(conf, uri_type, id, map_result_fn): - """Looking up type object with sha1. - - config is the configuration needed for the backend to execute query - - vcs_object is the object to look for in the backend - - map_result_fn is a mapping function which takes the backend's result - and transform its output accordingly. - - This function returns an http response of the result. - """ - uri_type_ok = _uri_types.get(uri_type, None) - if not uri_type_ok: - return make_response('Bad request!', 400) - - with db.connect(conf['db_url']) as db_conn: - res = storage.find(db_conn, id, uri_type_ok) - if res: - return write_response(map_result_fn(id, res)) # 200 - return make_response('Not found!', 404) - - -@app.route('/vcs/occurrences/') -def list_occurrences_for(id): - """Return the occurrences pointing to the revision id. - """ - return _do_lookup(app.config['conf'], - 'occurrences', - id, - lambda _, result: list(map(lambda col: col[1], result))) - - -@app.route('/vcs//') -def object_exists_p(uri_type, id): - """Assert if the object with sha1 id, of type uri_type, exists. - """ - return _do_lookup(app.config['conf'], - uri_type, - id, - lambda sha1, _: {'id': sha1}) - - -@app.route('/vcs//', methods=['PUT']) -def put_object(uri_type, id): - """Put an object in storage. - """ - return _do_action_with_payload(app.config['conf'], - add_object, - uri_type, - id, - lambda sha1, _2: sha1) # FIXME: use id or result instead - - -def run(conf): - """Run the api's server. - conf is a dictionary of keywords: - - 'db_url' the db url's access (through psycopg2 format) - - 'content_storage_dir' revisions/directories/contents storage on disk - - 'host' to override the default 127.0.0.1 to open or not the server to - the world - - 'port' to override the default of 5000 (from the underlying layer: - flask) - - 'debug' activate the verbose logs - """ - print("""SWH Api run -host: %s -port: %s -debug: %s""" % (conf['host'], conf.get('port', None), conf['debug'])) - - # app.config is the app's state (accessible) - app.config.update({'conf': conf}) - - app.run(host=conf['host'], - port=conf.get('port', None), - debug=conf['debug'] == 'true') diff --git a/swh/loader/git/client/__init__.py b/swh/loader/git/client/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/git/client/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/client/http.py b/swh/loader/git/client/http.py deleted file mode 100755 index 4bb11fc..0000000 --- a/swh/loader/git/client/http.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import requests - -from retrying import retry - -from swh.loader.git.retry import policy -from swh.loader.git.storage import storage -from swh.loader.git.protocols import serial - - -session_swh = requests.Session() - - -def compute_simple_url(base_url, type): - """Compute the api url. - """ - return '%s%s' % (base_url, type) - - -@retry(retry_on_exception=policy.retry_if_connection_error, - wrap_exception=True, - stop_max_attempt_number=3) -def execute(map_type_url, - method_fn, - base_url, - obj_type, - data, - result_fn=lambda result: result.ok): - """Execute a query to the backend. - - map_type_url is a map of {type: url backend} - - method_fn is swh_session.post or swh_session.put - - base_url is the base url of the backend - - obj_type is the nature of the data - - data is the data to send to the backend - - result_fn is a function which takes the response - result and do something with it. The default function - is to return if the server is ok or not. - """ - if not data: - return data - - res = method_fn(compute_simple_url(base_url, map_type_url[obj_type]), - data=serial.dumps(data), - headers={'Content-Type': serial.MIMETYPE}) - return result_fn(res) - - -# url mapping for lookup -url_lookup_per_type = { storage.Type.origin: "/origins/" - , storage.Type.content: "/vcs/contents/" - , storage.Type.directory: "/vcs/directories/" - , storage.Type.revision: "/vcs/revisions/" - } - - -def post(base_url, obj_type, obj_sha1s): - """Retrieve the objects of type type with sha1 sha1hex. - """ - return execute(url_lookup_per_type, - session_swh.post, - base_url, - obj_type, - obj_sha1s, - result_fn=lambda res: serial.loads(res.content)) - - -# url mapping for storage -url_store_per_type = { storage.Type.origin: "/origins/" - , storage.Type.content: "/vcs/contents/" - , storage.Type.directory: "/vcs/directories/" - , storage.Type.revision: "/vcs/revisions/" - , storage.Type.release: "/vcs/releases/" - , storage.Type.occurrence: "/vcs/occurrences/" - , storage.Type.person: "/vcs/persons/" - } - -def put(base_url, obj_type, obj): - """Given an obj (map, simple object) of obj_type, PUT it in the backend. - """ - return execute(url_store_per_type, session_swh.put, base_url, obj_type, obj) diff --git a/swh/loader/git/conf/__init__.py b/swh/loader/git/conf/__init__.py deleted file mode 100644 index a6b182c..0000000 --- a/swh/loader/git/conf/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -# placeholder diff --git a/swh/loader/git/conf/reader.py b/swh/loader/git/conf/reader.py deleted file mode 100755 index f332883..0000000 --- a/swh/loader/git/conf/reader.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import configparser -import os - - -_map_convert_fn = {'int': int, - 'bool': lambda x: x == 'true'} # conversion per type - - -def read(conf_file, default_conf=None): - """Read the user's configuration file. - Fill in the gap using `default_conf`. -`default_conf` is similar to this: -DEFAULT_CONF = { - 'a': ('string', '/tmp/swh-loader-git/log'), - 'b': ('string', 'dbname=swhloadergit') - 'c': ('bool', true) - 'e': ('bool', None) - 'd': ('int', 10) -} - - """ - config = configparser.ConfigParser(defaults=default_conf) - config.read(os.path.expanduser(conf_file)) - conf = config._sections['main'] - - # remaining missing default configuration key are set - # also type conversion is enforced for underneath layer - for key in default_conf: - nature_type, default_value = default_conf[key] - val = conf.get(key, None) - if not val: # fallback to default value - conf[key] = default_value - else: # value present but in string format, force type conversion - conf[key] = _map_convert_fn.get(nature_type, lambda x: x)(val) - - return conf - - -def prepare_folders(conf, *keys): - """Prepare the folder mentioned in config under keys. - """ - def makedir(folder): - if not os.path.exists(folder): - os.makedirs(folder) - - for key in keys: - makedir(conf[key]) diff --git a/swh/loader/git/data/__init__.py b/swh/loader/git/data/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/git/data/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/data/swhrepo.py b/swh/loader/git/data/swhrepo.py deleted file mode 100644 index df4963d..0000000 --- a/swh/loader/git/data/swhrepo.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -class SWHRepo(): - """Structure with: - - sha1s as list - - map indexed by sha1 - """ - def __init__(self): - self.origin = {} - self.releases = [] - self.occurrences = [] - self.contents = {} - self.directories = {} - self.revisions = {} - self.persons = {} - self.visited = set() - - def add_origin(self, origin): - self.origin = origin - - def get_origin(self): - return self.origin - - def add_release(self, release): - self.releases.append(release) - - def get_releases(self): - return self.releases - - def add_occurrence(self, occurrence): - self.occurrences.append(occurrence) - - def get_occurrences(self): - return self.occurrences - - def add_content(self, content_ref): - sha1 = content_ref['id'] - self.contents[sha1] = content_ref - self.visited.add(sha1) - - def get_contents(self): - return self.contents - - def add_directory(self, directory): - sha1 = directory['id'] - self.directories[sha1] = directory - self.visited.add(sha1) - - def get_directories(self): - return self.directories - - def add_revision(self, revision): - sha1 = revision['id'] - self.revisions[sha1] = revision - self.visited.add(sha1) - - def add_person(self, id, person): - self.persons[id] = person - - def get_persons(self): - return self.persons.values() - - def already_visited(self, sha1): - return sha1 in self.visited - - def get_revisions(self): - return self.revisions diff --git a/swh/loader/git/gitloader/__init__.py b/swh/loader/git/gitloader/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/git/gitloader/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py deleted file mode 100644 index 5578ae7..0000000 --- a/swh/loader/git/loader.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import logging -import os - -from swh.loader.git import git, remote_store, local_store - - -_load_to_back_fn = {'remote': remote_store.load_to_back - ,'local': local_store.prepare_and_load_to_back - } - - -def check_user_conf(conf): - """Check the user's configuration and rejects if problems. - """ - action = conf['action'] - if action != 'load': - return 'skip unknown action %s' % action - - backend_type = conf['backend-type'] - if backend_type not in _load_to_back_fn: - return 'skip unknown backend-type %s (only `remote`, `local` supported)' % backend_type - - repo_path = conf['repo_path'] - if not os.path.exists(repo_path): - return 'Repository %s does not exist.' % repo_path - - return None - - -def load(conf): - """According to action, load the repo_path. - - used configuration keys: - - action: requested action - - repo_path: git repository path ('load' action only) - - backend-type: backend access's type (remote or local) - - backend: url access to backend api - """ - error_msg = check_user_conf(conf) - if error_msg: - logging.error(error_msg) - raise Exception(error_msg) - - repo_path = conf['repo_path'] - logging.info('load repo_path %s' % repo_path) - - swhrepo = git.parse(repo_path) - _load_to_back_fn[conf['backend-type']](conf['backend'], swhrepo) diff --git a/swh/loader/git/local_store.py b/swh/loader/git/local_store.py deleted file mode 100644 index eac68bb..0000000 --- a/swh/loader/git/local_store.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from swh.loader.git.storage import storage, db, service -from swh.loader.git.conf import reader -from swh.storage.objstorage import ObjStorage - - -# FIXME: duplicated from bin/swh-backend... -# Default configuration file -DEFAULT_CONF_FILE = '~/.config/swh/back.ini' - - -# default configuration -DEFAULT_CONF = { - 'content_storage_dir': ('string', '/tmp/swh-loader-git/content-storage'), - 'log_dir': ('string', '/tmp/swh-loader-git/log'), - 'db_url': ('string', 'dbname=softwareheritage-dev'), - 'folder_depth': ('int', 4), - 'debug': ('bool', None), - 'host': ('string', '127.0.0.1'), - 'port': ('int', 5000) -} - - -def store_only_new(db_conn, conf, obj_type, obj): - """Store object if not already present. - """ - if not storage.find(db_conn, obj['id'], obj_type): - storage.add(db_conn, conf, obj) - - -_obj_to_persist_fn = {storage.Type.revision: service.add_revisions} - - -def store_unknown_objects(db_conn, conf, obj_type, swhmap): - """Load objects to the backend. - """ - sha1s = swhmap.keys() - - # have: filter unknown obj - unknown_obj_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) - if not unknown_obj_sha1s: - return True - - # seen: now store in backend - persist_fn = _obj_to_persist_fn.get(obj_type, service.add_objects) - obj_fulls = map(swhmap.get, unknown_obj_sha1s) - return persist_fn(db_conn, conf, obj_type, obj_fulls) - - -def load_to_back(conf, swh_repo): - """Load to the backend the repository swh_repo. - """ - with db.connect(conf['db_url']) as db_conn: - # First, store/retrieve the origin identifier - # FIXME: should be done by the cloner worker (which is not yet plugged - # on the right swh db ftm) - service.add_origin(db_conn, swh_repo.get_origin()) - - # First reference all unknown persons - service.add_persons(db_conn, conf, storage.Type.person, - swh_repo.get_persons()) - - res = store_unknown_objects(db_conn, conf, storage.Type.content, - swh_repo.get_contents()) - if res: - res = store_unknown_objects(db_conn, conf, storage.Type.directory, - swh_repo.get_directories()) - if res: - res = store_unknown_objects(db_conn, conf, storage.Type.revision, - swh_repo.get_revisions()) - if res: - # brutally send all remaining occurrences - service.add_objects(db_conn, conf, storage.Type.occurrence, - swh_repo.get_occurrences()) - - # and releases (the idea here is that compared to existing - # objects, the quantity is less) - service.add_objects(db_conn, conf, storage.Type.release, - swh_repo.get_releases()) - - -def prepare_and_load_to_back(backend_setup_file, swh_repo): - # Read the configuration file (no check yet) - conf = reader.read(backend_setup_file or DEFAULT_CONF_FILE, DEFAULT_CONF) - reader.prepare_folders(conf, 'content_storage_dir') - conf.update({ - 'objstorage': ObjStorage(conf['content_storage_dir'], - conf['folder_depth']) - }) - - load_to_back(conf, swh_repo) diff --git a/swh/loader/git/manager.py b/swh/loader/git/manager.py deleted file mode 100755 index 8df0a98..0000000 --- a/swh/loader/git/manager.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import logging - -from swh.loader.git.storage import db, models - - -def manage(action, db_url): - """According to action, load the repository. - - used configuration keys: - - action: requested action [cleandb|initdb] - """ - with db.connect(db_url) as db_conn: - if action == 'cleandb': - logging.info('clean database') - models.cleandb(db_conn) - elif action == 'initdb': - logging.info('initialize database') - models.initdb(db_conn) - else: - logging.warn('skip unknown-action %s' % action) diff --git a/swh/loader/git/protocols/__init__.py b/swh/loader/git/protocols/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/git/protocols/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/protocols/serial.py b/swh/loader/git/protocols/serial.py deleted file mode 100755 index f5755f1..0000000 --- a/swh/loader/git/protocols/serial.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file_or_handle at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file_or_handle for more information - -import pickle -from io import BytesIO - -MIMETYPE="application/octet-stream" - - -def load(file_or_handle): - """Read a pickled object from the opened file_or_handle object. - """ - return pickle.load(file_or_handle) - - -def loads(obj): - """Read a pickled object from bytes object. - """ - if obj == b'': - return obj - return pickle.loads(obj) - - -def dumps(obj): - """Return the pickle representation of the obj. - """ - return pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) - - -def dumps_as_stream(obj): - """Return the pickle representation of the obj as stream. - """ - return pickle.dump(obj, BytesIO(), protocol=pickle.HIGHEST_PROTOCOL) diff --git a/swh/loader/git/remote_store.py b/swh/loader/git/remote_store.py deleted file mode 100644 index fbbc6c2..0000000 --- a/swh/loader/git/remote_store.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from swh.loader.git.storage import storage -from swh.loader.git.client import http - - -def store_unknown_objects(back_url, obj_type, swhmap): - """Load objects to the backend. - """ - sha1s = list(swhmap.keys()) - # have: filter unknown obj - unknown_obj_sha1s = http.post(back_url, obj_type, sha1s) - if not unknown_obj_sha1s: - return True - - # store unknown objects - return http.put(back_url, obj_type, map(swhmap.get, unknown_obj_sha1s)) - - -def load_to_back(back_url, swh_repo): - """Load to the back_url the repository swh_repo. - """ - # First, store/retrieve the origin identifier - # FIXME: should be done by the cloner worker (which is not yet plugged on - # the right swh db ftm) - http.put(back_url, - obj_type=storage.Type.origin, - obj=swh_repo.get_origin()) - - http.put(back_url, - obj_type=storage.Type.person, - obj=list(swh_repo.get_persons())) - - # let the backend and api discuss what's really needed - # - first this worker sends the checksums - # - then the backend answers the checksums it does not know - # - then the worker sends only what the backend does not know per - # object type basis - res = store_unknown_objects(back_url, storage.Type.content, - swh_repo.get_contents()) - - if res: - res = store_unknown_objects(back_url, storage.Type.directory, - swh_repo.get_directories()) - if res: - res = store_unknown_objects(back_url, storage.Type.revision, - swh_repo.get_revisions()) - if res: - # brutally send all remaining occurrences - http.put(back_url, - storage.Type.occurrence, - swh_repo.get_occurrences()) - - # and releases (the idea here is that compared to existing - # other objects, the quantity is less) - http.put(back_url, - storage.Type.release, - swh_repo.get_releases()) - - # FIXME: deal with collision failures which should be raised by backend. diff --git a/swh/loader/git/retry/__init__.py b/swh/loader/git/retry/__init__.py deleted file mode 100644 index fdffa2a..0000000 --- a/swh/loader/git/retry/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# placeholder diff --git a/swh/loader/git/retry/policy.py b/swh/loader/git/retry/policy.py deleted file mode 100644 index 2fca5d8..0000000 --- a/swh/loader/git/retry/policy.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from requests import ConnectionError - - -def retry_if_io_error(exc): - """Return True if IOError, - False otherwise. - """ - return isinstance(exc, IOError) - - -def retry_if_connection_error(exc): - """Return True if ConnectionError, - False otherwise. - """ - return isinstance(exc, ConnectionError) diff --git a/swh/loader/git/storage/__init__.py b/swh/loader/git/storage/__init__.py deleted file mode 100644 index a6b182c..0000000 --- a/swh/loader/git/storage/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -# placeholder diff --git a/swh/loader/git/storage/db.py b/swh/loader/git/storage/db.py deleted file mode 100644 index 7f603aa..0000000 --- a/swh/loader/git/storage/db.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import psycopg2 - - -connect = psycopg2.connect - - -def execute(cur, query_params, trace=None): - """Execute the query_params. - query_params is expected to be either: - - a sql query (string) - - a tuple (sql query, params) - """ - if trace: - print("query: ", cur.mogrify(*query_params).decode()) - if isinstance(query_params, str): - cur.execute(query_params) - else: - cur.execute(*query_params) - - -def copy_from(cur, file, table): - """Copy the content of a file to the db in the table table. - """ - cur.copy_from(file, table) - - -def insert(db_conn, query_params, trace=None): - """Execute an insertion and returns the identifier. - Expect an insert query with the right returning clause. - No check is done. - """ - with db_conn.cursor() as cur: - execute(cur, query_params, trace) - result = cur.fetchone() - return result[0] - - -def query_execute(db_conn, query_params, trace=None): - """Execute one query. - Type of sql queries: insert, delete, drop, create... - query_params is expected to be either: - - a sql query (string) - - a tuple (sql query, params) - """ - with db_conn.cursor() as cur: - return execute(cur, query_params, trace) - - -def queries_execute(db_conn, queries_params, trace=None): - """Execute multiple queries without any result expected. - Type of sql queries: insert, delete, drop, create... - query_params is expected to be a list of mixed: - - sql query (string) - - tuple (sql query, params) - """ - with db_conn.cursor() as cur: - for query_params in queries_params: - execute(cur, query_params, trace) - - -def query_fetchone(db_conn, query_params, trace=None): - """Execute sql query which returns one result. - query_params is expected to be either: - - a sql query (string) - - a tuple (sql query, params) - """ - with db_conn.cursor() as cur: - return fetchone(cur, query_params, trace) - - -def fetchone(cur, query_params, trace=None): - """Execute sql query and returns one result. - """ - execute(cur, query_params, trace) - return cur.fetchone() - - -def query_fetch(db_conn, query_params, trace=None): - """Execute sql query which returns results. - query_params is expected to be either: - - a sql query (string) - - a tuple (sql query, params) - """ - with db_conn.cursor() as cur: - execute(cur, query_params, trace) - return cur.fetchall() diff --git a/swh/loader/git/storage/models.py b/swh/loader/git/storage/models.py deleted file mode 100644 index 8de27bc..0000000 --- a/swh/loader/git/storage/models.py +++ /dev/null @@ -1,287 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from enum import Enum - -from . import db - - -class Type(Enum): - """Types of git objects. - """ - occurrence = 'occurrence' # ~git branch - release = 'release' # ~git annotated tag - revision = 'revision' # ~git commit - directory = 'directory' # ~git tree - directory_entry = 'directory_entry' # ~git tree_entry - content = 'content' # ~git blob - origin = 'origin' - person = 'person' # committer, tagger, author - - -def initdb(db_conn): - """For retrocompatibility. - """ - pass - - -def cleandb(db_conn): - db.queries_execute(db_conn, ['TRUNCATE TABLE release CASCADE', - 'TRUNCATE TABLE revision CASCADE', - 'TRUNCATE TABLE revision_history CASCADE', - 'TRUNCATE TABLE directory CASCADE', - 'TRUNCATE TABLE directory_entry CASCADE', - 'TRUNCATE TABLE content CASCADE', - 'TRUNCATE TABLE occurrence_history CASCADE', - 'TRUNCATE TABLE occurrence CASCADE', - 'TRUNCATE TABLE origin CASCADE', - 'TRUNCATE TABLE person CASCADE', - ]) - - -def add_origin(db_conn, url, type, parent=None): - """Insert origin and returns the newly inserted id. - """ - return db.insert(db_conn, - ("""INSERT INTO origin (type, url, parent_id) - VALUES (%s, %s, %s) - RETURNING id""", - (type, url, parent))) - -def add_person(db_conn, name, email): - """Insert author and returns the newly inserted id. - """ - return db.insert(db_conn, - ("""INSERT INTO person (name, email) - VALUES (%s, %s) - RETURNING id""", - (name, email))) - - -def add_content(db_conn, sha1, sha1_content, sha256_content, size): - """Insert a new content. - """ - db.query_execute(db_conn, - ("""INSERT INTO content (id, sha1, sha256, length) - VALUES (%s, %s, %s, %s)""", - (sha1, sha1_content, sha256_content, size))) - - -def add_directory(db_conn, obj_sha): - """Insert a new directory. - """ - db.query_execute(db_conn, - ("""INSERT INTO directory (id) - VALUES (%s)""", - (obj_sha,))) - - -def add_directory_entry(db_conn, name, sha, type, perms, - atime, mtime, ctime, parent): - """Insert a new directory. - """ - db.query_execute(db_conn, - ("""INSERT INTO directory_entry - (name, id, type, perms, atime, mtime, ctime, - directory) - VALUES (%s, %s, %s, %s, %s, %s, %s, - %s)""", - (name, sha, type, perms, atime, mtime, ctime, - parent))) - - -def add_revision(db_conn, sha, date, directory, message, author, committer, - parent_shas=None): - """Insert a revision. - """ - db.query_execute(db_conn, - ("""INSERT INTO revision - (id, date, directory, message, author, committer) - VALUES (%s, %s, %s, %s, - (select id from person where name=%s and email=%s), - (select id from person where name=%s and email=%s))""", - (sha, date, directory, message, - author['name'], author['email'], - committer['name'], committer['email']))) - - -def add_revision_history(db_conn, couple_parents): - """Store the revision history graph. - """ - tuples = ','.join(["('%s','%s')" % couple for couple in couple_parents]) - query = 'INSERT INTO revision_history (id, parent_id) VALUES ' + tuples - db.query_execute(db_conn, query) - - -def add_release(db_conn, obj_sha, revision, date, name, comment, author): - """Insert a release. - """ - db.query_execute(db_conn, - ("""INSERT INTO release (id, revision, date, name, comment, author) - VALUES (%s, %s, %s, %s, %s, - (select id from person where name=%s and email=%s))""", - (obj_sha, revision, date, name, comment, author['name'], author['email']))) - - -def add_occurrence(db_conn, url_origin, reference, revision): - """Insert an occurrence. - Check if occurrence history already present. - If present do nothing, otherwise insert - """ - with db_conn.cursor() as cur: - occ = find_occurrence(cur, reference, revision, url_origin) - if not occ: - db.execute( - cur, - ("""INSERT INTO occurrence - (origin, reference, revision) - VALUES ((select id from origin where url=%s), %s, %s)""", - (url_origin, reference, revision))) - - -def find_revision(db_conn, obj_sha): - """Find a revision by its obj_sha. - """ - return find_object(db_conn, obj_sha, Type.revision) - - -def find_directory(db_conn, obj_sha): - """Find a directory by its obj_sha. - """ - return find_object(db_conn, obj_sha, Type.directory) - - -def find_content(db_conn, obj_sha): - """Find a content by its obj_sha. - """ - return find_object(db_conn, obj_sha, Type.content) - - -def find_occurrences_for_revision(db_conn, revision, type): - """Find all occurences for a specific revisions. - type is not used (implementation detail). - """ - return db.query_fetch(db_conn, ("""SELECT * - FROM occurrence - WHERE revision=%s""", - (revision,))) - - -def find_origin(db_conn, origin_url, origin_type): - """Find all origins matching an url and an origin type. - """ - return db.query_fetchone(db_conn, ("""SELECT * - FROM origin - WHERE url=%s - AND type=%s""", - (origin_url, origin_type))) - -def find_person(db_conn, email, name): - """Find a person uniquely identified by email and name. - """ - return db.query_fetchone(db_conn, ("""SELECT id - FROM person - WHERE email=%s - AND name=%s""", - (email, name))) - - -def find_occurrence(cur, reference, revision, url_origin): - """Find an ocurrence with reference pointing on valid revision for date. - """ - return db.fetchone( - cur, - ("""SELECT * - FROM occurrence oc - WHERE reference=%s - AND revision=%s - AND origin = (select id from origin where url = %s)""", - (reference, revision, url_origin))) - - -def find_object(db_conn, obj_sha, obj_type): - """Find an object of obj_type by its obj_sha. - """ - table = obj_type if isinstance(obj_type, str) else obj_type.value - query = 'select id from ' + table + ' where id=%s' - return db.query_fetchone(db_conn, (query, (obj_sha,))) - - -def filter_unknown_objects(db_conn, file_sha1s, table_to_filter, tbl_tmp_name): - """Given a list of sha1s, filter the unknown object between this list and - the content of the table table_to_filter. - tbl_tmp_name is the temporary table used to filter. - """ - with db_conn.cursor() as cur: - # explicit is better than implicit - # simply creating the temporary table seems to be enough - db.execute(cur, """CREATE TEMPORARY TABLE IF NOT EXISTS %s( - id git_object_id) - ON COMMIT DELETE ROWS;""" % tbl_tmp_name) - db.copy_from(cur, file_sha1s, tbl_tmp_name) - db.execute(cur, '(SELECT id FROM %s) EXCEPT (SELECT id FROM %s);' % - (tbl_tmp_name, table_to_filter)) - return cur.fetchall() - - -def find_unknown_revisions(db_conn, file_sha1s): - """Filter unknown revisions from file_sha1s. - """ - return filter_unknown_objects(db_conn, file_sha1s, 'revision', - 'filter_sha1_revision') - - -def find_unknown_directories(db_conn, file_sha1s): - """Filter unknown directories from file_sha1s. - """ - return filter_unknown_objects(db_conn, file_sha1s, 'directory', - 'filter_sha1_directory') - - -def find_unknown_contents(db_conn, file_sha1s): - """Filter unknown contents from file_sha1s. - """ - return filter_unknown_objects(db_conn, file_sha1s, 'content', - 'filter_sha1_content') - - -def _count_objects(db_conn, type): - return db.query_fetchone(db_conn, 'SELECT count(*) FROM ' + type.value)[0] - - -def count_revisions(db_conn): - """Count the number of revisions. - """ - return _count_objects(db_conn, Type.revision) - - -def count_directories(db_conn): - """Count the number of directories. - """ - return _count_objects(db_conn, Type.directory) - - -def count_contents(db_conn): - """Count the number of contents. - """ - return _count_objects(db_conn, Type.content) - - -def count_occurrence(db_conn): - """Count the number of occurrence. - """ - return _count_objects(db_conn, Type.occurrence) - - -def count_release(db_conn): - """Count the number of occurrence. - """ - return _count_objects(db_conn, Type.release) - - -def count_person(db_conn): - """Count the number of occurrence. - """ - return _count_objects(db_conn, Type.person) diff --git a/swh/loader/git/storage/service.py b/swh/loader/git/storage/service.py deleted file mode 100644 index 7922fef..0000000 --- a/swh/loader/git/storage/service.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from . import storage - - -filter_unknowns_type = storage.find_unknowns - - -def find_origin(db_conn, origin): - """Find origin. - """ - origin_found = storage.find_origin(db_conn, origin) - return None if not origin_found else {'id': origin_found[0]} - - -def find_person(db_conn, person): - """Find person. - """ - person_found = storage.find_person(db_conn, person) - return None if not person_found else {'id': person_found[0]} - - -def add_origin(db_conn, origin): - """Add origin if not already existing. - """ - origin_found = storage.find_origin(db_conn, origin) - id = origin_found[0] if origin_found else storage.add_origin(db_conn, origin) - return {'id': id} - - -def add_revisions(db_conn, conf, obj_type, objs): - """Add Revisions. - """ - couple_parents = [] - for obj in objs: # iterate over objects of type uri_type - obj_id = obj['id'] - obj_found = storage.find(db_conn, obj_id, obj_type) - if not obj_found: - storage.add(db_conn, conf, obj_id, obj_type, obj) - - # deal with revision history - par_shas = obj.get('parent-sha1s', None) - if par_shas: - couple_parents.extend([(obj_id, p) for p in par_shas]) - - storage.add_revision_history(db_conn, couple_parents) - - return True - - -def add_persons(db_conn, conf, obj_type, objs): - """Add persons. - conf, obj_type are not used (implementation detail.) - """ - for obj in objs: - obj_found = storage.find_person(db_conn, obj) - if not obj_found: - storage.add_person(db_conn, obj) - - return True - - -# dispatch map to add in storage with fs or not -_add_fn = {storage.Type.content: storage.add_with_fs_storage} - - -def add_objects(db_conn, conf, obj_type, objs): - """Add objects if not already present in the storage. - """ - add_fn = _add_fn.get(obj_type, storage.add) - res = [] - for obj in objs: # iterate over objects of type uri_type - obj_id = obj['id'] - obj_found = storage.find(db_conn, obj_id, obj_type) - if not obj_found: - obj = add_fn(db_conn, conf, obj_id, obj_type, obj) - res.append(obj) - else: - res.append(obj_found) - - return res - - -_persist_fn = {storage.Type.person: add_persons, - storage.Type.revision: add_revisions} - - -def persist(db_conn, conf, obj_type, objs): - """Generic call to persist persons, revisions or other objects. - - """ - persist_fn = _persist_fn.get(obj_type, add_objects) - return persist_fn(db_conn, conf, obj_type, objs) diff --git a/swh/loader/git/storage/storage.py b/swh/loader/git/storage/storage.py deleted file mode 100755 index b9638e5..0000000 --- a/swh/loader/git/storage/storage.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from io import StringIO - -from . import models - - -Type = models.Type - - -_find_object = {Type.occurrence: models.find_occurrences_for_revision} - - -def find(db_conn, id, type): - """Find an object according to its sha1hex and type. - """ - find_fn = _find_object.get(type, models.find_object) - return find_fn(db_conn, id, type) - - -_find_unknown = {Type.revision: models.find_unknown_revisions, - Type.content: models.find_unknown_contents, - Type.directory: models.find_unknown_directories} - - -def find_unknowns(db_conn, obj_type, sha1s_hex): - """Given a list of sha1s, return the non presents one in storage. - """ - def row_to_sha1(row): - """Convert a row (memoryview) to a string sha1. - """ - return row[0] - - vals = '\n'.join(sha1s_hex) - cpy_data_buffer = StringIO() - cpy_data_buffer.write(vals) - cpy_data_buffer.seek(0) # move file cursor back at start of file - - find_unknown_fn = _find_unknown[obj_type] - unknowns = find_unknown_fn(db_conn, cpy_data_buffer) - cpy_data_buffer.close() - return list(map(row_to_sha1, unknowns)) - - -def _add_content(db_conn, vcs_object, sha1hex): - """Add a blob to storage. - Designed to be wrapped in a db transaction. - Returns: - - the sha1 if everything went alright. - - None if something went wrong - Writing exceptions can also be raised and expected to be handled by the - caller. - """ - models.add_content(db_conn, - sha1hex, - vcs_object['content-sha1'], - vcs_object['content-sha256'], - vcs_object['size']) - return sha1hex - - -def _add_directory(db_conn, vcs_object, sha1hex): - """Add a directory to storage. - Designed to be wrapped in a db transaction. - """ - models.add_directory(db_conn, sha1hex) - for directory_entry in vcs_object['entries']: - _add_directory_entry(db_conn, directory_entry) - return sha1hex - - -def _add_directory_entry(db_conn, vcs_object): - """Add a directory to storage. - Designed to be wrapped in a db transaction. - Returns: - - the sha1 if everything went alright. - - None if something went wrong - Writing exceptions can also be raised and expected to be handled by the - caller. - """ - name = vcs_object['name'] - parent = vcs_object['parent'] - models.add_directory_entry(db_conn, - name, - vcs_object['target-sha1'], - vcs_object['nature'], - vcs_object['perms'], - vcs_object['atime'], - vcs_object['mtime'], - vcs_object['ctime'], - parent) - return name, parent - - -def _add_revision(db_conn, vcs_object, sha1hex): - """Add a revision to storage. - Designed to be wrapped in a db transaction. - Returns: - - the sha1 if everything went alright. - - None if something went wrong - Writing exceptions can also be raised and expected to be handled by the - caller. - """ - models.add_revision(db_conn, - sha1hex, - vcs_object['date'], - vcs_object['directory'], - vcs_object['message'], - vcs_object['author'], - vcs_object['committer'], - vcs_object['parent-sha1s']) - return sha1hex - - -def _add_release(db_conn, vcs_object, sha1hex): - """Add a release. - """ - models.add_release(db_conn, - sha1hex, - vcs_object['revision'], - vcs_object['date'], - vcs_object['name'], - vcs_object['comment'], - vcs_object['author']) - return sha1hex - - -def _add_occurrence(db_conn, vcs_object, sha1hex): - """Add an occurrence. - """ - models.add_occurrence(db_conn, - vcs_object['url-origin'], - vcs_object['reference'], - vcs_object['revision']) - return sha1hex - - -def add_person(db_conn, vcs_object): - """Add an author. - """ - return models.add_person(db_conn, - vcs_object['name'], - vcs_object['email']) - - -_store_fn = {Type.directory: _add_directory, - Type.revision: _add_revision, - Type.release: _add_release, - Type.occurrence: _add_occurrence} - - -def add_origin(db_conn, origin): - """A a new origin and returns its id. - """ - return models.add_origin(db_conn, origin['url'], origin['type']) - - -def find_origin(db_conn, origin): - """Find an existing origin. - """ - return models.find_origin(db_conn, origin['url'], origin['type']) - - -def find_person(db_conn, person): - """Find an existing person. - """ - return models.find_person(db_conn, person['email'], person['name']) - - -def add_with_fs_storage(db_conn, config, id, type, vcs_object): - """Add vcs_object in the storage - - db_conn is the opened connection to the db - - config is the map of configuration needed for core layer - - type is not used here but represent the type of vcs_object - - vcs_object is the object meant to be persisted in fs and db - """ - config['objstorage'].add_bytes(vcs_object['content'], id) # FIXME use this id - return _add_content(db_conn, vcs_object, id) - - -def add(db_conn, config, id, type, vcs_object): - """Given a sha1hex, type and content, store a given object in the store. - - db_conn is the opened connection to the db - - config is not used here - - type is the object's type - - vcs_object is the object meant to be persisted in db - """ - return _store_fn[type](db_conn, vcs_object, id) - - -def add_revision_history(db_conn, couple_parents): - """Given a list of tuple (sha, parent_sha), store in revision_history. - """ - if len(couple_parents) > 0: - models.add_revision_history(db_conn, couple_parents) diff --git a/swh/loader/git/tests/test_api_content.py b/swh/loader/git/tests/test_api_content.py deleted file mode 100644 index 3c8eb69..0000000 --- a/swh/loader/git/tests/test_api_content.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import app_client, app_client_teardown - - -@attr('slow') -class ContentTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, self.content_storage_dir = app_client() - - with db.connect(db_url) as db_conn: - self.content_sha1_id = '222222f9dd5dc46ee476a8be155ab049994f717e' - content_sha1_id = 'blabliblablo' - self.content_sha256_hex = '222222f9dd5dc46ee476a8be155ab049994f717e' - models.add_content(db_conn, - self.content_sha1_id, - content_sha1_id, - self.content_sha256_hex, - 10) - - def tearDown(self): - app_client_teardown(self.content_storage_dir) - - @istest - def get_content_ok(self): - # when - rv = self.app.get('/vcs/contents/%s' % self.content_sha1_id) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == '222222f9dd5dc46ee476a8be155ab049994f717e' - - @istest - def get_content_not_found(self): - # when - rv = self.app.get('/vcs/contents/222222f9dd5dc46ee476a8be155ab049994f7170') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def get_content_not_found_with_bad_format(self): - # when - rv = self.app.get('/vcs/contents/1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def put_content_create_and_update(self): - content_sha1 = 'sha1-contentc46ee476a8be155ab03333333333' - - # does not exist - rv = self.app.get('/vcs/contents/%s' % content_sha1) - - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - # we create it - body = {'id': content_sha1, - 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', - 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', - 'content': b'bar', - 'size': '3'} - - rv = self.app.put('/vcs/contents/%s' % content_sha1, - data=serial.dumps(body), - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # now it exists - rv = self.app.get('/vcs/contents/%s' % content_sha1) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' - - # # we update it - body = {'id': content_sha1, - 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', - 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', - 'content': b'bar', - 'size': '3'} - - rv = self.app.put('/vcs/contents/%s' % content_sha1, - data=serial.dumps(body), - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # still the same - rv = self.app.get('/vcs/contents/%s' % content_sha1) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' diff --git a/swh/loader/git/tests/test_api_directory.py b/swh/loader/git/tests/test_api_directory.py deleted file mode 100644 index 3646e3a..0000000 --- a/swh/loader/git/tests/test_api_directory.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import now, app_client, app_client_teardown - - -@attr('slow') -class DirectoryTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, self.content_storage_dir = app_client() - - with db.connect(db_url) as db_conn: - self.content_sha1_id = 'content-sha1c46ee476a8be155ab049994f717e' - content_sha1_hex = 'content-sha1c46ee476a8be155ab049994f717e' - content_sha256_hex = 'content-sha2566ee476a8be155ab049994f717e' - models.add_content(db_conn, - self.content_sha1_id, - content_sha1_hex, - content_sha256_hex, - 10) - - self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' - models.add_directory(db_conn, self.directory_sha1_hex) - - def tearDown(self): - app_client_teardown(self.content_storage_dir) - - @istest - def get_directory_ok(self): - # when - rv = self.app.get('/vcs/directories/%s' % self.directory_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f717e' - - @istest - def get_directory_not_found(self): - # when - rv = self.app.get('/vcs/directories/111111f9dd5dc46ee476a8be155ab049994f7170') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def get_directory_not_found_with_bad_format(self): - # when - rv = self.app.get('/vcs/directories/1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def put_directory_create_and_update(self): - directory_sha1='directory-sha16ee476a8be155ab049994f7170' - - # does not exist - rv = self.app.get('/vcs/directories/%s' % directory_sha1) - - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - # we create it - body = serial.dumps({'entries': [{'name': 'filename', - 'target-sha1': self.content_sha1_id, - 'nature': 'file', - 'perms': '000', - 'atime': now(), - 'mtime': now(), - 'ctime': now(), - 'parent': directory_sha1}, - {'name': 'dirname', - 'target-sha1': self.directory_sha1_hex, - 'nature': 'directory', - 'perms': '012', - 'atime': now(), - 'mtime': now(), - 'ctime': now(), - 'parent': directory_sha1} - ]}) - - rv = self.app.put('/vcs/directories/%s' % directory_sha1, - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # now it exists - rv = self.app.get('/vcs/directories/%s' % directory_sha1) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' - - # we update it - rv = self.app.put('/vcs/directories/directory-sha16ee476a8be155ab049994f7170', - data=serial.dumps({'entry': 'directory-bar'}), - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # still the same - rv = self.app.get('/vcs/directories/directory-sha16ee476a8be155ab049994f7170') - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' diff --git a/swh/loader/git/tests/test_api_home.py b/swh/loader/git/tests/test_api_home.py deleted file mode 100644 index 3352151..0000000 --- a/swh/loader/git/tests/test_api_home.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from test_utils import app_client - - -@attr('slow') -class HomeTestCase(unittest.TestCase): - def setUp(self): - self.app, _, _ = app_client() - - @istest - def get_slash(self): - # when - rv = self.app.get('/') - - # then - assert rv.status_code == 200 - assert rv.data == b'Dev SWH API' - - @istest - def get_404(self): - # when - rv = self.app.get('/nowhere') - - # then - assert rv.status_code == 404 - - @istest - def get_bad_request(self): - # when - rv = self.app.get('/vcs/not-a-good-type/1') - - # then - assert rv.status_code == 400 - assert rv.data == b'Bad request!' diff --git a/swh/loader/git/tests/test_api_occurrence.py b/swh/loader/git/tests/test_api_occurrence.py deleted file mode 100644 index 95c3e8f..0000000 --- a/swh/loader/git/tests/test_api_occurrence.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import now, app_client, app_client_teardown - - -@attr('slow') -class OccurrenceTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, self.content_storage_dir = app_client() - - with db.connect(db_url) as db_conn: - self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' - models.add_directory(db_conn, self.directory_sha1_hex) - - authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} - models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) - - self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' - models.add_revision(db_conn, - self.revision_sha1_hex, - now(), - self.directory_sha1_hex, - "revision message", - authorAndCommitter, - authorAndCommitter) - - self.origin_url = "https://github.com/user/repo" - models.add_origin(db_conn, self.origin_url, 'git') - - self.reference_name = 'master' - models.add_occurrence(db_conn, - self.origin_url, - self.reference_name, - self.revision_sha1_hex) - - self.reference_name2 = 'master2' - models.add_occurrence(db_conn, - self.origin_url, - self.reference_name2, - self.revision_sha1_hex) - - self.revision_sha1_hex_2 = '2-revision-sha1-to-test-existence9994f71' - models.add_revision(db_conn, - self.revision_sha1_hex_2, - now(), - self.directory_sha1_hex, - "revision message 2", - authorAndCommitter, - authorAndCommitter) - - def tearDown(self): - app_client_teardown(self.content_storage_dir) - - @istest - def get_occurrence_ok(self): - # when - rv = self.app.get('/vcs/occurrences/%s' % self.revision_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data) == [self.reference_name, self.reference_name2] - - @istest - def get_occurrence_not_found(self): - # when - rv = self.app.get('/vcs/occurrences/inexistant-sha1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def get_occurrence_not_found_with_bad_format(self): - # when - rv = self.app.get('/vcs/occurrences/1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def put_occurrence_create_and_update(self): - occ_revision_sha1_hex = self.revision_sha1_hex_2 - - rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) - - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - # we create it - body = serial.dumps({'revision': occ_revision_sha1_hex, # FIXME: redundant with the one from uri.. - 'reference': 'master', - 'url-origin': self.origin_url}) - - rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, # ... here - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # now it exists - rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data) == ['master'] - - # we update it - rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # still the same - rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data) == ['master'] diff --git a/swh/loader/git/tests/test_api_origin.py b/swh/loader/git/tests/test_api_origin.py deleted file mode 100644 index 3792139..0000000 --- a/swh/loader/git/tests/test_api_origin.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import app_client - - -@attr('slow') -class OriginTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, _ = app_client() - - with db.connect(db_url) as db_conn: - self.origin_url = 'https://github.com/torvalds/linux.git' - self.origin_type = 'git' - self.origin_id = models.add_origin(db_conn, self.origin_url, self.origin_type) - - @istest - def get_origin_ok(self): - # when - payload = {'url': self.origin_url, - 'type': self.origin_type} - rv = self.app.post('/origins/', - data=serial.dumps(payload), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == self.origin_id - - @istest - def get_origin_not_found(self): - # when - payload = {'url': 'unknown', - 'type': 'blah'} - rv = self.app.post('/origins/', - data=serial.dumps(payload), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 404 - assert rv.data == b'Origin not found!' - - @istest - def get_origin_not_found_with_bad_format(self): - # when - rv = self.app.post('/origins/', - data=serial.dumps({'url': 'unknown'}), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 400 - - @istest - def put_origin(self): - # when - payload = {'url': 'unknown', - 'type': 'blah'} - rv = self.app.post('/origins/', - data=serial.dumps(payload), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 404 - assert rv.data == b'Origin not found!' - - # when - rv = self.app.put('/origins/', - data=serial.dumps(payload), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 # FIXME: 201 - assert serial.loads(rv.data)['id'] - - payload = {'url': 'unknown', - 'type': 'blah'} - rv = self.app.post('/origins/', - data=serial.dumps(payload), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 200 - origin_id = serial.loads(rv.data)['id'] - assert origin_id - - # when - rv = self.app.put('/origins/', - data=serial.dumps(payload), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 # FIXME: 204 - assert serial.loads(rv.data)['id'] == origin_id diff --git a/swh/loader/git/tests/test_api_person.py b/swh/loader/git/tests/test_api_person.py deleted file mode 100644 index 8c52f17..0000000 --- a/swh/loader/git/tests/test_api_person.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import app_client - - -@attr('slow') -class PersonTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, _ = app_client() - - with db.connect(db_url) as db_conn: - self.person_name = 'some-name' - self.person_email = 'some@mail.git' - self.person_id = models.add_person(db_conn, self.person_name, self.person_email) - - @istest - def get_person_ok(self): - # when - person = {'name': self.person_name, - 'email': self.person_email} - rv = self.app.post('/vcs/persons/', - data=serial.dumps(person), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == self.person_id - - @istest - def get_person_not_found(self): - # when - person = {'name': 'unknown', - 'email': 'blah'} - rv = self.app.post('/vcs/persons/', - data=serial.dumps(person), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 404 - assert rv.data == b'Person not found!' - - @istest - def get_person_not_found_with_bad_format(self): - # when - rv = self.app.post('/vcs/persons/', - data=serial.dumps({'name': 'unknown'}), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 400 - - @istest - def put_person(self): - # when - person = {'name': 'unknown', - 'email': 'blah'} - rv = self.app.post('/vcs/persons/', - data=serial.dumps(person), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 404 - assert rv.data == b'Person not found!' - - # when - rv = self.app.put('/vcs/persons/', - data=serial.dumps([person]), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 204 - assert rv.data == b'' - - person = {'name': 'unknown', - 'email': 'blah'} - rv = self.app.post('/vcs/persons/', - data=serial.dumps(person), - headers={'Content-Type': serial.MIMETYPE}) - # then - assert rv.status_code == 200 - person_id = serial.loads(rv.data)['id'] - assert person_id - - # when - rv = self.app.put('/vcs/persons/', - data=serial.dumps([person, person]), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 204 - assert rv.data == b'' diff --git a/swh/loader/git/tests/test_api_post_per_type.py b/swh/loader/git/tests/test_api_post_per_type.py deleted file mode 100644 index 87b4f3a..0000000 --- a/swh/loader/git/tests/test_api_post_per_type.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import now, app_client, app_client_teardown - - -@attr('slow') -class TestPostObjectsPerTypeCase(unittest.TestCase): - def setUp(self): - self.app, self.db_url, self.content_storage_dir = app_client() - - with db.connect(self.db_url) as db_conn: - self.content_sha1_id = 'sha1-content0-6ee476a8be155ab049994f717e' - self.content_sha256_hex = 'sha256-content0-e476a8be155ab049994f717e' - models.add_content(db_conn, - self.content_sha1_id, - self.content_sha1_id, - self.content_sha256_hex, - 10) - - self.directory_sha1_hex = 'directory-sha1-ee476a8be155ab049994f717e' - models.add_directory(db_conn, self.directory_sha1_hex) - - authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} - models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) - - authorAndCommitter2 = {'name': 'tony', 'email': 'tony@dude.org'} - models.add_person(db_conn, authorAndCommitter2['name'], authorAndCommitter2['email']) - - self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' - models.add_revision(db_conn, - self.revision_sha1_hex, - now(), - self.directory_sha1_hex, - "revision message", - authorAndCommitter, - authorAndCommitter) - - self.revision_sha1_hex2 = 'revision-sha1-2-for-testing-put-occurr' - models.add_revision(db_conn, - self.revision_sha1_hex2, - now(), - self.directory_sha1_hex, - "revision message", - authorAndCommitter2, - authorAndCommitter2, - parent_shas=['revision-sha1-to-test-existence9994f717e']) - - self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' - models.add_release(db_conn, - self.release_sha1_hex, - self.revision_sha1_hex, - now(), - "0.0.1", - "Super release tagged by tony", - authorAndCommitter2) - - self.origin_url = "https://github.com/user/repo" - models.add_origin(db_conn, self.origin_url, 'git') - - models.add_occurrence(db_conn, - self.origin_url, - 'master', - self.revision_sha1_hex) - - def tearDown(self): - app_client_teardown(self.content_storage_dir) - - @istest - def post_all_non_presents_contents(self): - # given - - # when - payload = [self.content_sha1_id, - '555444f9dd5dc46ee476a8be155ab049994f717e', - '555444f9dd5dc46ee476a8be155ab049994f717e', - '666777f9dd5dc46ee476a8be155ab049994f717e'] - query_payload = serial.dumps(payload) - - rv = self.app.post('/vcs/contents/', - data=query_payload, - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 - - sha1s = serial.loads(rv.data) - assert len(sha1s) is 2 # only 2 sha1s - assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s - assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s - - @istest - def post_all_non_presents_directories(self): - # given - - # when - payload = [self.directory_sha1_hex, - '555444f9dd5dc46ee476a8be155ab049994f717e', - '555444f9dd5dc46ee476a8be155ab049994f717e', - '666777f9dd5dc46ee476a8be155ab049994f717e'] - query_payload = serial.dumps(payload) - - rv = self.app.post('/vcs/directories/', - data=query_payload, - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 - - sha1s = serial.loads(rv.data) - assert len(sha1s) is 2 # only 2 sha1s - assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s - assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s - - @istest - def post_all_non_presents_revisions(self): - # given - - # when - payload = [self.revision_sha1_hex, - self.revision_sha1_hex, - '555444f9dd5dc46ee476a8be155ab049994f717e', - '555444f9dd5dc46ee476a8be155ab049994f717e', - '666777f9dd5dc46ee476a8be155ab049994f717e'] - query_payload = serial.dumps(payload) - - rv = self.app.post('/vcs/revisions/', - data=query_payload, - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 - - sha1s = serial.loads(rv.data) - assert len(sha1s) is 2 # only 2 sha1s - assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s - assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s - - @istest - def post_all_non_presents_releases(self): - # given - - # when - payload = [self.release_sha1_hex, - self.release_sha1_hex, - '555444f9dd5dc46ee476a8be155ab049994f717e', - '555444f9dd5dc46ee476a8be155ab049994f717e', - '666777f9dd5dc46ee476a8be155ab049994f717e'] - query_payload = serial.dumps(payload) - - rv = self.app.post('/vcs/releases/', - data=query_payload, - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 400 - assert rv.data == b'Bad request. Type not supported!' - - @istest - def post_all_non_presents_occurrences_KO(self): - # given - - # when - payload = [self.revision_sha1_hex, - self.revision_sha1_hex, - '555444f9dd5dc46ee476a8be155ab049994f717e', - '555444f9dd5dc46ee476a8be155ab049994f717e', - '666777f9dd5dc46ee476a8be155ab049994f717e'] - query_payload = serial.dumps(payload) - - rv = self.app.post('/vcs/occurrences/', - data=query_payload, - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 400 - assert rv.data == b'Bad request. Type not supported!' - - @istest - def post_non_presents_objects_empty_payload_so_empty_results(self): - # given - - # when - for api_type in ['contents', 'directories', 'revisions']: - rv = self.app.post('/vcs/%s/' % api_type, - data=serial.dumps({}), - headers={'Content-Type': serial.MIMETYPE}) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data) == [] - - @istest - def post_non_presents_objects_bad_requests_format_pickle(self): - # given - - # when - for api_type in ['contents', 'directories', 'revisions']: - rv = self.app.post('/vcs/%s/' % api_type, - data="not pickle -> fail") - - # then - assert rv.status_code == 400 - assert rv.data == b'Bad request. Expected application/octet-stream data!' diff --git a/swh/loader/git/tests/test_api_release.py b/swh/loader/git/tests/test_api_release.py deleted file mode 100644 index 54f8a52..0000000 --- a/swh/loader/git/tests/test_api_release.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import now, app_client, app_client_teardown - - -@attr('slow') -class ReleaseTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, self.content_storage_dir = app_client() - - with db.connect(db_url) as db_conn: - self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' - models.add_directory(db_conn, self.directory_sha1_hex) - - self.tagAuthor = {'name': 'tony', 'email': 'tony@mail.org'} - models.add_person(db_conn, self.tagAuthor['name'], self.tagAuthor['email']) - - self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' - models.add_revision(db_conn, - self.revision_sha1_hex, - now(), - self.directory_sha1_hex, - "revision message", - self.tagAuthor, - self.tagAuthor) - - self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' - models.add_release(db_conn, - self.release_sha1_hex, - self.revision_sha1_hex, - now(), - "0.0.1", - "Super release tagged by tony", - self.tagAuthor) - - def tearDown(self): - app_client_teardown(self.content_storage_dir) - - @istest - def get_release_ok(self): - # when - rv = self.app.get('/vcs/releases/%s' % self.release_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == self.release_sha1_hex - - @istest - def get_release_not_found(self): - # when - rv = self.app.get('/vcs/releases/inexistant-sha1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def get_release_not_found_with_bad_format(self): - # when - rv = self.app.get('/vcs/releases/1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def put_release_create_and_update(self): - release_sha1_hex = 'sha1-release46ee476a8be155ab049994f717e' - - rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) - - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - # we create it - body = serial.dumps({'id': release_sha1_hex, - 'revision': self.revision_sha1_hex, - 'date': now(), - 'name': '0.0.1', - 'comment': 'super release tagged by ardumont', - 'author': self.tagAuthor}) - - rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # now it exists - rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == release_sha1_hex - - # we update it - rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # still the same - rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == release_sha1_hex diff --git a/swh/loader/git/tests/test_api_revision.py b/swh/loader/git/tests/test_api_revision.py deleted file mode 100644 index a605058..0000000 --- a/swh/loader/git/tests/test_api_revision.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh.loader.git.storage import db, models -from swh.loader.git.protocols import serial -from test_utils import now, app_client, app_client_teardown - - -@attr('slow') -class RevisionTestCase(unittest.TestCase): - def setUp(self): - self.app, db_url, self.content_storage_dir = app_client() - - with db.connect(db_url) as db_conn: - self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' - models.add_directory(db_conn, self.directory_sha1_hex) - - self.authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} - models.add_person(db_conn, self.authorAndCommitter['name'], self.authorAndCommitter['email']) - - self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' - models.add_revision(db_conn, - self.revision_sha1_hex, - now(), - self.directory_sha1_hex, - "revision message", - self.authorAndCommitter, - self.authorAndCommitter) - - def tearDown(self): - app_client_teardown(self.content_storage_dir) - - @istest - def get_revision_ok(self): - # when - rv = self.app.get('/vcs/revisions/%s' % self.revision_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == self.revision_sha1_hex - - @istest - def get_revision_not_found(self): - # when - rv = self.app.get('/vcs/revisions/inexistant-sha1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def get_revision_not_found_with_bad_format(self): - # when - rv = self.app.get('/vcs/revisions/1') - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - @istest - def put_revision_create_and_update(self): - revision_sha1_hex = 'sha1-revision46ee476a8be155ab049994f717e' - - rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) - - # then - assert rv.status_code == 404 - assert rv.data == b'Not found!' - - # we create it - body = serial.dumps({'date': now(), - 'directory': self.directory_sha1_hex, - 'message': 'revision message describing it', - 'committer': self.authorAndCommitter, - 'author': self.authorAndCommitter, - 'parent-sha1s': [self.revision_sha1_hex]}) - - rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # now it exists - rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == revision_sha1_hex - - # we update it - rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, - data=body, - headers={'Content-Type': serial.MIMETYPE}) - - assert rv.status_code == 204 - assert rv.data == b'' - - # still the same - rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) - - # then - assert rv.status_code == 200 - assert serial.loads(rv.data)['id'] == revision_sha1_hex diff --git a/swh/loader/git/tests/test_git_utils.py b/swh/loader/git/tests/test_git_utils.py deleted file mode 100644 index 7347c75..0000000 --- a/swh/loader/git/tests/test_git_utils.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import pygit2 - - -def create_blob(git_repo, blob_content): - """Create a blob with blob_content and returns its oid. - """ - return git_repo.create_blob(blob_content) - - -def create_tree(git_repo, blob_content=None): - """Create a tree. - If blob_content is specified, create a blob then - create a tree which points to this blob. - Returns the tree's oid. - """ - treeBuilder = git_repo.TreeBuilder() - if blob_content: - new_blob = create_blob(git_repo, blob_content) - treeBuilder.insert('blob', new_blob, - pygit2.GIT_FILEMODE_BLOB_EXECUTABLE) - return treeBuilder.write() - - -def create_author_and_committer(): - """Create a dummy signature for author and committer. - """ - author = pygit2.Signature('Alice Cooper', - 'alice@cooper.tld') - committer = pygit2.Signature('Vincent Furnier', - 'vincent@committers.tld') - return (author, committer) - -def create_tagger(): - """Create a dummy signature for author and committer. - """ - return pygit2.Signature('ToNyX', - 'tony@badass.org') - - -def create_commit_with_content(git_repo, - blob_content, - commit_msg, - commit_parents=None): - """Create a commit inside the git repository and return its oid. - """ - author, committer = create_author_and_committer() - tree = create_tree(git_repo, blob_content) - return git_repo.create_commit( - 'refs/heads/master', # the name of the reference to update - author, committer, commit_msg, - tree, # binary string representing the tree object ID - [] if commit_parents is None else commit_parents # commit parents - ) - -def create_tag(git_repo, name, commit, message): - """Create a dummy tag. - """ - return git_repo.create_tag(name, - commit.hex, - pygit2.GIT_OBJ_COMMIT, - create_tagger(), - message) diff --git a/swh/loader/git/tests/test_http.py b/swh/loader/git/tests/test_http.py deleted file mode 100644 index ba823de..0000000 --- a/swh/loader/git/tests/test_http.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest - -from swh.loader.git.client import http -from swh.loader.git.storage import storage - - -class TestHttp(unittest.TestCase): - @istest - def url(self): - # when - s = http.compute_simple_url('http://base-url', '/end') - - # then - assert s == 'http://base-url/end' - - @istest - def url_lookup_per_type(self): - # then - assert http.url_lookup_per_type == { storage.Type.origin: "/origins/" - , storage.Type.content: "/vcs/contents/" - , storage.Type.directory: "/vcs/directories/" - , storage.Type.revision: "/vcs/revisions/" } - - @istest - def url_store_per_type(self): - # then - assert http.url_store_per_type == { storage.Type.origin: "/origins/" - , storage.Type.content: "/vcs/contents/" - , storage.Type.directory: "/vcs/directories/" - , storage.Type.revision: "/vcs/revisions/" - , storage.Type.release: "/vcs/releases/" - , storage.Type.occurrence: "/vcs/occurrences/" - , storage.Type.person: "/vcs/persons/" - } diff --git a/swh/loader/git/tests/test_initdb.py b/swh/loader/git/tests/test_initdb.py deleted file mode 100644 index eb30c2a..0000000 --- a/swh/loader/git/tests/test_initdb.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from swh.loader.git.manager import manage - - -def prepare_db(db_url): - """DB fresh start. - """ - manage('cleandb', db_url) - manage('initdb', db_url) diff --git a/swh/loader/git/tests/test_local_loader.py b/swh/loader/git/tests/test_local_loader.py deleted file mode 100644 index 220edf8..0000000 --- a/swh/loader/git/tests/test_local_loader.py +++ /dev/null @@ -1,249 +0,0 @@ -# coding: utf-8 - -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest -import pygit2 -import tempfile -import shutil -import os - -from nose.plugins.attrib import attr -from nose.tools import istest - -from swh.loader.git.storage import db, models -from swh.loader.git import loader -from swh.loader.git.conf import reader - -import test_initdb -from test_utils import list_files_from -from test_git_utils import create_commit_with_content, create_tag - -@attr('slow') -class TestLocalLoader(unittest.TestCase): - def setUp(self): - """Initialize a git repository for the remaining test to manipulate. - """ - tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', - dir='/tmp') - self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) - - self.conf_back = reader.read('./resources/test/back.ini', - {'port': ('int', 9999)}) - - self.db_url = self.conf_back['db_url'] - - self.conf = { - 'action': 'load', - 'repo_path': self.tmp_git_repo.workdir, - 'backend-type': 'local', - 'backend': './resources/test/back.ini' - } - - def init_db_setup(self): - """Initialize a git repository for the remaining test to manipulate. - """ - test_initdb.prepare_db(self.db_url) - - def tearDown(self): - """Destroy the test git repository. - """ - shutil.rmtree(self.tmp_git_repo.workdir) - shutil.rmtree(self.conf_back['content_storage_dir'], ignore_errors=True) - - @istest - def should_fail_on_bad_action(self): - # when - try: - loader.load({'action': 'unknown'}) - except: - pass - - @istest - def should_fail_on_inexistant_folder(self): - # when - try: - loader.load({'action': 'load', - 'repo_path': 'something-that-definitely-does-not-exist'}) - except: - pass - - @istest - def should_fail_on_inexistant_backend_type(self): - # when - try: - loader.load({'action': 'load', - 'repo_path': '.', - 'backend-type': 'unknown'}) # only local or remote supported - except: - pass - - @istest - def local_loader(self): - """Trigger loader and make sure everything is ok. - """ - self.init_db_setup() - - # given - commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', - 'commit msg 0') - commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', - 'commit msg 1', - [commit0.hex]) - commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', - 'commit msg 2', - [commit1.hex]) - commit3 = create_commit_with_content(self.tmp_git_repo, None, - 'commit msg 3', - [commit2.hex]) - commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', - 'commit msg 4', - [commit3.hex]) - - # when - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 4, "4 blobs.") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 5, - "Should be 5 commits") - self.assertEquals( - models.count_directories(db_conn), - 5, - "Should be 5 trees") - self.assertEquals( - models.count_contents(db_conn), - 4, - "Should be 4 blobs as we created one commit without data!") - self.assertEquals( - models.count_release(db_conn), - 0, - "No tag created so 0 release.") - self.assertEquals( - models.count_occurrence(db_conn), - 1, - "Should be 1 reference (master) so 1 occurrence.") - - # given - commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', - 'commit msg 5', - [commit4.hex]) - commit6 = create_commit_with_content(self.tmp_git_repo, - 'new blob and last 6', - 'commit msg 6', - [commit5.hex]) - commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', - 'commit msg 7', - [commit6.hex]) - - # when - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 4+3, "3 new blobs.") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 8, - "Should be 5+3 == 8 commits now") - self.assertEquals( - models.count_directories(db_conn), - 8, - "Should be 5+3 == 8 trees") - self.assertEquals( - models.count_contents(db_conn), - 7, - "Should be 4+3 == 7 blobs") - self.assertEquals( - models.count_release(db_conn), - 0, - "No tag created so 0 release.") - self.assertEquals( - models.count_occurrence(db_conn), - 2, - "Should be 1 reference which changed twice so 2 occurrences (master changed).") - - # given - create_commit_with_content(self.tmp_git_repo, None, - 'commit 8 with parent 2', - [commit7.hex]) - - # when - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 7, "no new blob.") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 9, - "Should be 8+1 == 9 commits now") - self.assertEquals( - models.count_directories(db_conn), - 8, - "Should be 8 trees (new commit without blob so no new tree)") - self.assertEquals( - models.count_contents(db_conn), - 7, - "Should be 7 blobs (new commit without new blob)") - self.assertEquals( - models.count_release(db_conn), - 0, - "No tag created so 0 release.") - self.assertEquals( - models.count_occurrence(db_conn), - 3, - "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") - self.assertEquals( - models.count_person(db_conn), - 2, - "1 author + 1 committer") - - - # add tag - create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') - create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') - - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) - self.assertEquals(nb_files, 7, "no new blob.") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 9, - "Should be 8+1 == 9 commits now") - self.assertEquals( - models.count_directories(db_conn), - 8, - "Should be 8 trees (new commit without blob so no new tree)") - self.assertEquals( - models.count_contents(db_conn), - 7, - "Should be 7 blobs (new commit without new blob)") - self.assertEquals( - models.count_release(db_conn), - 2, - "Should be 2 annotated tags so 2 releases") - self.assertEquals( - models.count_occurrence(db_conn), - 3, - "master did not change this time so still 3 occurrences") - self.assertEquals( - models.count_person(db_conn), - 3, - "1 author + 1 committer + 1 tagger") diff --git a/swh/loader/git/tests/test_remote_loader.py b/swh/loader/git/tests/test_remote_loader.py deleted file mode 100644 index 971f83e..0000000 --- a/swh/loader/git/tests/test_remote_loader.py +++ /dev/null @@ -1,252 +0,0 @@ -# coding: utf-8 - -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest -import pygit2 -import tempfile -import shutil -import os - -from nose.plugins.attrib import attr -from nose.tools import istest - -from swh.loader.git.storage import db, models -from swh.loader.git import loader -from swh.loader.git.conf import reader - -import test_initdb -from test_git_utils import create_commit_with_content, create_tag -from test_utils import list_files_from - - -@attr('slow') -class TestRemoteLoader(unittest.TestCase): - def setUp(self): - tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', - dir='/tmp') - self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) - self.conf = reader.read('./resources/test/back.ini', - {'port': ('int', 9999)}) - - self.db_url = self.conf['db_url'] - self.conf.update({ - 'action': 'load', - 'repo_path': self.tmp_git_repo.workdir, - 'backend-type': 'remote', - 'backend': 'http://localhost:%s' % self.conf['port'] - }) - - # Not the remote loader in charge of creating the folder, so we do it - if not os.path.exists(self.conf['content_storage_dir']): - os.mkdir(self.conf['content_storage_dir']) - - def init_db_setup(self): - """Initialize a git repository for the remaining test to manipulate. - """ - test_initdb.prepare_db(self.db_url) - - def tearDown(self): - """Destroy the test git repository. - """ - shutil.rmtree(self.tmp_git_repo.workdir) - shutil.rmtree(self.conf['content_storage_dir']) - - @istest - def should_fail_on_bad_action(self): - # when - try: - loader.load({'action': 'unknown'}) - except: - # FIXME assert raises - pass - - @istest - def should_fail_on_inexistant_folder(self): - # when - try: - loader.load({'action': 'load', - 'repo_path': 'something-that-definitely-does-not-exist'}) - except: - pass - - @istest - def should_fail_on_inexistant_backend_type(self): - # when - try: - loader.load({'action': 'load', - 'repo_path': '.', - 'backend-type': 'unknown'}) # only local or remote supported - except: - pass - - @istest - def remote_loader(self): - """Trigger loader and make sure everything is ok. - """ - # given - self.init_db_setup() - - # given - commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', - 'commit msg 0') - commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', - 'commit msg 1', - [commit0.hex]) - commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', - 'commit msg 2', - [commit1.hex]) - commit3 = create_commit_with_content(self.tmp_git_repo, None, - 'commit msg 3', - [commit2.hex]) - commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', - 'commit msg 4', - [commit3.hex]) - - # when - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 4, "4 blobs") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 5, - "Should be 5 commits") - self.assertEquals( - models.count_directories(db_conn), - 5, - "Should be 5 trees") - self.assertEquals( - models.count_contents(db_conn), - 4, - "Should be 4 blobs as we created one commit without data!") - self.assertEquals( - models.count_release(db_conn), - 0, - "No tag created so 0 release.") - self.assertEquals( - models.count_occurrence(db_conn), - 1, - "Should be 1 reference (master) so 1 occurrence.") - - # given - commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', - 'commit msg 5', - [commit4.hex]) - commit6 = create_commit_with_content(self.tmp_git_repo, - 'new blob and last 6', - 'commit msg 6', - [commit5.hex]) - commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', - 'commit msg 7', - [commit6.hex]) - - # when - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 4+3, "3 new blobs") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 8, - "Should be 5+3 == 8 commits now") - self.assertEquals( - models.count_directories(db_conn), - 8, - "Should be 5+3 == 8 trees") - self.assertEquals( - models.count_contents(db_conn), - 7, - "Should be 4+3 == 7 blobs") - self.assertEquals( - models.count_release(db_conn), - 0, - "No tag created so 0 release.") - self.assertEquals( - models.count_occurrence(db_conn), - 2, - "Should be 1 reference which changed twice so 2 occurrences (master changed).") - - # given - create_commit_with_content(self.tmp_git_repo, None, - 'commit 8 with parent 2', - [commit7.hex]) - - # when - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 7, "no new blob") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 9, - "Should be 8+1 == 9 commits now") - self.assertEquals( - models.count_directories(db_conn), - 8, - "Should be 8 trees (new commit without blob so no new tree)") - self.assertEquals( - models.count_contents(db_conn), - 7, - "Should be 7 blobs (new commit without new blob)") - self.assertEquals( - models.count_release(db_conn), - 0, - "No tag created so 0 release.") - self.assertEquals( - models.count_occurrence(db_conn), - 3, - "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") - self.assertEquals( - models.count_person(db_conn), - 2, - "1 author + 1 committer") - - - # add tag - create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') - create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') - - loader.load(self.conf) - - # then - nb_files = len(list_files_from(self.conf['content_storage_dir'])) - self.assertEquals(nb_files, 7, "no new blob") - - with db.connect(self.db_url) as db_conn: - self.assertEquals( - models.count_revisions(db_conn), - 9, - "Should be 8+1 == 9 commits now") - self.assertEquals( - models.count_directories(db_conn), - 8, - "Should be 8 trees (new commit without blob so no new tree)") - self.assertEquals( - models.count_contents(db_conn), - 7, - "Should be 7 blobs (new commit without new blob)") - self.assertEquals( - models.count_release(db_conn), - 2, - "Should be 2 annotated tags so 2 releases") - self.assertEquals( - models.count_occurrence(db_conn), - 3, - "master did not change this time so still 3 occurrences") - self.assertEquals( - models.count_person(db_conn), - 3, - "1 author + 1 committer + 1 tagger") diff --git a/swh/loader/git/tests/test_swhrepo.py b/swh/loader/git/tests/test_swhrepo.py deleted file mode 100644 index 2555c90..0000000 --- a/swh/loader/git/tests/test_swhrepo.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest - -from nose.tools import istest - -from swh.loader.git.data import swhrepo - - -class SWHRepoTestCase(unittest.TestCase): - @istest - def new_swhrepo(self): - # when - r = swhrepo.SWHRepo() - - r.add_origin({'url': 'foobar'}) - - r.add_content({'id': 'some-con-sha1'}) - r.add_content({'id': 'some-con-sha1-2','stuff': 'some-stuff'}) - r.add_directory({'id': 'some-dir-sha1'}) - r.add_directory({'id': 'some-dir-sha1-2'}) - r.add_revision({'id': 'some-rev-sha1'}) - r.add_revision({'id': 'some-rev-sha1-2'}) - r.add_person('id0', {'name': 'the one'}) - r.add_person('id1', {'name': 'another one'}) - - r.add_occurrence({'id': 'some-occ-sha1'}) - r.add_release({'id': 'some-rel-sha1'}) - - # then - assert r.get_origin() == {'url': 'foobar'} - assert r.get_releases() == [{'id': 'some-rel-sha1'}] - assert r.get_occurrences() == [{'id': 'some-occ-sha1'}] - - for sha in ['some-con-sha1', 'some-con-sha1-2', - 'some-dir-sha1', 'some-dir-sha1-2', - 'some-rev-sha1', 'some-rev-sha1-2']: - assert r.already_visited(sha) is True - - assert r.already_visited('some-occ-sha1') is False - assert r.already_visited('some-rel-sha1') is False - - assert r.get_contents() == {'some-con-sha1': {'id': 'some-con-sha1'}, - 'some-con-sha1-2': {'id': 'some-con-sha1-2','stuff': 'some-stuff'}} - assert r.get_directories() == {'some-dir-sha1': {'id': 'some-dir-sha1'}, - 'some-dir-sha1-2': {'id': 'some-dir-sha1-2'}} - assert r.get_revisions() == {'some-rev-sha1': {'id': 'some-rev-sha1'}, - 'some-rev-sha1-2': {'id': 'some-rev-sha1-2'}} - - assert len(r.get_persons()) == 2 diff --git a/swh/loader/git/tests/test_utils.py b/swh/loader/git/tests/test_utils.py deleted file mode 100644 index f8edf1d..0000000 --- a/swh/loader/git/tests/test_utils.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import time -import os -import shutil -import tempfile - -from swh.loader.git.backend import api -from swh.storage.objstorage import ObjStorage - -import test_initdb - -def now(): - """Build the date as of now in the api's format. - - """ - return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) - - -def list_files_from(root_path): - """Compute the list of files from root_path. - - """ - f = [] - for (dirpath, dirnames, filenames) in os.walk(root_path): - f.extend(filenames) - return f - - -def app_client(db_url="dbname=softwareheritage-dev-test"): - """Setup the application ready for testing. - - """ - content_storage_dir = tempfile.mkdtemp(prefix='test-swh-loader-git.', - dir='/tmp') - folder_depth = 2 - api.app.config['conf'] = {'db_url': db_url, - 'content_storage_dir': content_storage_dir, - 'log_dir': '/tmp/swh-loader-git/log', - 'folder_depth': folder_depth, - 'debug': 'true', - 'objstorage': ObjStorage(content_storage_dir, - folder_depth) - } - - api.app.config['TESTING'] = True - app = api.app.test_client() - test_initdb.prepare_db(db_url) - return app, db_url, content_storage_dir - - -def app_client_teardown(content_storage_dir): - """Tear down app client's context. - - """ - shutil.rmtree(content_storage_dir, ignore_errors=True)