diff --git a/Makefile.tests b/Makefile.tests index 9d136ac..ee3627b 100644 --- a/Makefile.tests +++ b/Makefile.tests @@ -1,83 +1,83 @@ # -*- makefile -*- NOSEFLAGS=--nologcapture -v DB_TEST=$(DB)-test -TESTDIR = ./swh/tests +TESTDIR = ./swh/loader/git/tests test-connect-db: psql $(DB_TEST) test-create-db: cd $(SQL_FOLDER) && make clean initdb DBNAME=$(DB_TEST) test-drop-db: cd $(SQL_FOLDER) && make clean dropdb DBNAME=$(DB_TEST) test-cleandb: PYTHONPATH=$(_PYPATH) $(SWH_DB_MANAGER) $(FLAG) --config ./resources/test/db-manager.ini cleandb test-clean: rm -rf /tmp/swh-loader-git/test/ test-prepare: mkdir -p /tmp/swh-loader-git/test/ test-log-back: tail $(FOLLOW_LOG) /tmp/swh-loader-git/test/log/back.log test-check-meta: @echo "DB $(DB_TEST) metadata:" @$(BINDIR)/db-git-repo-meta.sh $(DB_TEST) @echo tests: PYTHONPATH=$(_PYPATH) make test test-run-back: PYTHONPATH=$(_PYPATH) $(SWH_BACK) $(FLAG) --config ./resources/test/back.ini test-http: $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_http.py test-swhrepo: $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_swhrepo.py test-api: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api*.py test-api-post-per-type: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_post_*.py test-api-content: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_content.py test-api-directory: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_directory.py test-api-revision: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_revision.py test-api-release: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_release.py test-api-occurrence: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_occurrence.py test-api-home: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_home.py test-api-origin: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_origin.py test-api-person: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_api_person.py test-file: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_file.py test-remote-loader: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_remote_loader.py test-local-loader: PYTHONPATH=$(_PYPATH) $(NOSE) $(NOSEFLAGS) $(TESTDIR)/test_local_loader.py test-loaders: test-local-loader test-remote-loader diff --git a/bin/swh-backend b/bin/swh-backend index 561081c..0124211 100755 --- a/bin/swh-backend +++ b/bin/swh-backend @@ -1,58 +1,58 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os -from swh.backend import api -from swh.conf import reader +from swh.loader.git.backend import api +from swh.loader.git.conf import reader from swh.storage.objstorage import ObjStorage # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/back.ini' # default configuration DEFAULT_CONF = { 'content_storage_dir' : ('string', '/tmp/swh-loader-git/content-storage'), 'log_dir' : ('string', '/tmp/swh-loader-git/log'), 'db_url' : ('string', 'dbname=softwareheritage-dev'), 'folder_depth' : ('int' , 4), 'debug' : ('bool' , None), 'host' : ('string', '127.0.0.1'), 'port' : ('int' , 5000) } def parse_args(): """Parse the configuration for the cli. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') args = cli.parse_args() return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir', 'content_storage_dir') conf.update({ 'objstorage': ObjStorage(conf['content_storage_dir'], conf['folder_depth']) }) logging.basicConfig(filename=os.path.join(conf['log_dir'], 'back.log'), level=logging.DEBUG if args.verbose else logging.INFO) api.run(conf) diff --git a/bin/swh-db-manager b/bin/swh-db-manager index b1eb35e..a690114 100755 --- a/bin/swh-db-manager +++ b/bin/swh-db-manager @@ -1,56 +1,56 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os -from swh import manager -from swh.conf import reader +from swh.loader.git import manager +from swh.loader.git.conf import reader # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/db-manager.ini' # default configuration (can be overriden by the DEFAULT_CONF_FILE) DEFAULT_CONF = { 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'db_url' : ('string', 'dbname=softwareheritage-dev') } def parse_args(): """Parse the configuration for the cli. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') subcli = cli.add_subparsers(dest='action') subcli.add_parser('initdb', help='initialize DB') subcli.add_parser('cleandb', help='clean DB') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir') logging.basicConfig(filename=os.path.join(conf['log_dir'], 'db-manager.log'), level=logging.DEBUG if args.verbose else logging.INFO) manager.manage(args.action, conf['db_url']) diff --git a/bin/swh-loader-git b/bin/swh-loader-git index 27fdcab..268820f 100755 --- a/bin/swh-loader-git +++ b/bin/swh-loader-git @@ -1,67 +1,67 @@ #!/usr/bin/env python3 # Copyright (C) 2015 Stefano Zacchiroli , # Antoine R. Dumont # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import argparse import logging import os -from swh.gitloader import loader -from swh.conf import reader +from swh.loader.git.gitloader import loader +from swh.loader.git.conf import reader # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/loader-git.ini' # default configuration (can be overriden by the DEFAULT_CONF_FILE) DEFAULT_CONF = { 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'backend-type': ('string', 'remote'), 'backend': ('string', 'http://localhost:5000'), } # Another example of configuration: # DEFAULT_CONF = { # 'log_dir': ('string', '/tmp/swh-loader-git/log'), # 'backend-type': ('string', 'local'), # 'backend': ('string', '~/.config/swh/back.ini'), # } def parse_args(): """Parse the CLI arguments. """ cli = argparse.ArgumentParser( description='Parse git repository objects to load them into DB.') cli.add_argument('--verbose', '-v', action='store_true', help='Verbosity level in log file.') cli.add_argument('--config', '-c', help='configuration file path') subcli = cli.add_subparsers(dest='action') load_cli = subcli.add_parser('load', help='load Git repo into DB') load_cli.add_argument('repository', help='Git repository path') args = cli.parse_args() if not args.action: cli.error('no action given') return args if __name__ == '__main__': args = parse_args() conf = reader.read(args.config or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'log_dir') conf['action'] = args.action conf['repo_path'] = args.repository logging.basicConfig(filename=os.path.join(conf['log_dir'], 'sgloader.log'), level=logging.DEBUG if args.verbose else logging.INFO) loader.load(conf) diff --git a/swh/backend/__init__.py b/swh/loader/__init__.py similarity index 100% copy from swh/backend/__init__.py copy to swh/loader/__init__.py diff --git a/swh/client/__init__.py b/swh/loader/git/__init__.py similarity index 100% rename from swh/client/__init__.py rename to swh/loader/git/__init__.py diff --git a/swh/data/__init__.py b/swh/loader/git/backend/__init__.py similarity index 100% rename from swh/data/__init__.py rename to swh/loader/git/backend/__init__.py diff --git a/swh/backend/api.py b/swh/loader/git/backend/api.py similarity index 98% rename from swh/backend/api.py rename to swh/loader/git/backend/api.py index 5488ad7..2cbb1ae 100755 --- a/swh/backend/api.py +++ b/swh/loader/git/backend/api.py @@ -1,254 +1,254 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from flask import Flask, Response, make_response, request -from swh.store import store, db, service -from swh.protocols import serial +from swh.loader.git.store import store, db, service +from swh.loader.git.protocols import serial # api's definition app = Flask(__name__) def read_request_payload(request): """Read the request's payload. """ # TODO: Check the signed pickled data? return serial.load(request.stream) def write_response(data): """Write response from data. """ return Response(serial.dumps(data), mimetype=serial.MIMETYPE) @app.route('/') def hello(): """A simple api to define what the server is all about. FIXME: A redirect towards a static page defining the routes would be nice. """ return 'Dev SWH API' # from uri to type _uri_types = {'revisions': store.Type.revision, 'directories': store.Type.directory, 'contents': store.Type.content, 'releases': store.Type.release, 'occurrences': store.Type.occurrence, 'persons': store.Type.person} def _do_action_with_payload(conf, action_fn, uri_type, id, map_result_fn): uri_type_ok = _uri_types.get(uri_type, None) if uri_type_ok is None: return make_response('Bad request!', 400) vcs_object = read_request_payload(request) vcs_object.update({'id': id, 'type': uri_type_ok}) return action_fn(conf, vcs_object, map_result_fn) # occurrence type is not dealt the same way _post_all_uri_types = {'revisions': store.Type.revision, 'directories': store.Type.directory, 'contents': store.Type.content} @app.route('/vcs//', methods=['POST']) def filter_unknowns_type(uri_type): """Filters unknown sha1 to the backend and returns them. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) obj_type = _post_all_uri_types.get(uri_type) if obj_type is None: return make_response('Bad request. Type not supported!', 400) sha1s = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: unknowns_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) if unknowns_sha1s is None: return make_response('Bad request!', 400) else: return write_response(unknowns_sha1s) @app.route('/vcs/persons/', methods=['POST']) def post_person(): """Find a person. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: person_found = service.find_person(db_conn, origin) if person_found: return write_response(person_found) else: return make_response('Person not found!', 404) except: return make_response('Bad request!', 400) @app.route('/origins/', methods=['POST']) def post_origin(): """Find an origin. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: origin_found = service.find_origin(db_conn, origin) if origin_found: return write_response(origin_found) else: return make_response('Origin not found!', 404) except: return make_response('Bad request!', 400) @app.route('/origins/', methods=['PUT']) def put_origin(): """Create an origin or returns it if already existing. """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) origin = read_request_payload(request) config = app.config['conf'] with db.connect(config['db_url']) as db_conn: try: origin_found = service.add_origin(db_conn, origin) return write_response(origin_found) # FIXME: 204 except: return make_response('Bad request!', 400) @app.route('/vcs//', methods=['PUT']) def put_all(uri_type): """Store or update given objects (uri_type in {contents, directories, releases). """ if request.headers.get('Content-Type') != serial.MIMETYPE: return make_response('Bad request. Expected %s data!' % serial.MIMETYPE, 400) payload = read_request_payload(request) obj_type = _uri_types[uri_type] config = app.config['conf'] with db.connect(config['db_url']) as db_conn: service.persist(db_conn, config, obj_type, payload) return make_response('Successful creation!', 204) def add_object(config, vcs_object, map_result_fn): """Add object in storage. - config is the configuration needed for the backend to execute query - vcs_object is the object to look for in the backend - map_result_fn is a mapping function which takes the backend's result and transform its output accordingly. This function returns an http response of the result. """ type = vcs_object['type'] id = vcs_object['id'] logging.debug('store %s %s' % (type, id)) with db.connect(config['db_url']) as db_conn: res = service.add_objects(db_conn, config, type, [vcs_object]) return make_response(map_result_fn(id, res), 204) def _do_lookup(conf, uri_type, id, map_result_fn): """Looking up type object with sha1. - config is the configuration needed for the backend to execute query - vcs_object is the object to look for in the backend - map_result_fn is a mapping function which takes the backend's result and transform its output accordingly. This function returns an http response of the result. """ uri_type_ok = _uri_types.get(uri_type, None) if not uri_type_ok: return make_response('Bad request!', 400) with db.connect(conf['db_url']) as db_conn: res = store.find(db_conn, id, uri_type_ok) if res: return write_response(map_result_fn(id, res)) # 200 return make_response('Not found!', 404) @app.route('/vcs/occurrences/') def list_occurrences_for(id): """Return the occurrences pointing to the revision id. """ return _do_lookup(app.config['conf'], 'occurrences', id, lambda _, result: list(map(lambda col: col[1], result))) @app.route('/vcs//') def object_exists_p(uri_type, id): """Assert if the object with sha1 id, of type uri_type, exists. """ return _do_lookup(app.config['conf'], uri_type, id, lambda sha1, _: {'id': sha1}) @app.route('/vcs//', methods=['PUT']) def put_object(uri_type, id): """Put an object in storage. """ return _do_action_with_payload(app.config['conf'], add_object, uri_type, id, lambda sha1, _2: sha1) # FIXME: use id or result instead def run(conf): """Run the api's server. conf is a dictionary of keywords: - 'db_url' the db url's access (through psycopg2 format) - 'content_storage_dir' revisions/directories/contents storage on disk - 'host' to override the default 127.0.0.1 to open or not the server to the world - 'port' to override the default of 5000 (from the underlying layer: flask) - 'debug' activate the verbose logs """ print("""SWH Api run host: %s port: %s debug: %s""" % (conf['host'], conf.get('port', None), conf['debug'])) # app.config is the app's state (accessible) app.config.update({'conf': conf}) app.run(host=conf['host'], port=conf.get('port', None), debug=conf['debug'] == 'true') diff --git a/swh/gitloader/__init__.py b/swh/loader/git/client/__init__.py similarity index 100% rename from swh/gitloader/__init__.py rename to swh/loader/git/client/__init__.py diff --git a/swh/client/http.py b/swh/loader/git/client/http.py similarity index 95% rename from swh/client/http.py rename to swh/loader/git/client/http.py index 71b18a5..7157142 100755 --- a/swh/client/http.py +++ b/swh/loader/git/client/http.py @@ -1,86 +1,86 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import requests from retrying import retry -from swh.retry import policy -from swh.store import store -from swh.protocols import serial +from swh.loader.git.retry import policy +from swh.loader.git.store import store +from swh.loader.git.protocols import serial session_swh = requests.Session() def compute_simple_url(base_url, type): """Compute the api url. """ return '%s%s' % (base_url, type) @retry(retry_on_exception=policy.retry_if_connection_error, wrap_exception=True, stop_max_attempt_number=3) def execute(map_type_url, method_fn, base_url, obj_type, data, result_fn=lambda result: result.ok): """Execute a query to the backend. - map_type_url is a map of {type: url backend} - method_fn is swh_session.post or swh_session.put - base_url is the base url of the backend - obj_type is the nature of the data - data is the data to send to the backend - result_fn is a function which takes the response result and do something with it. The default function is to return if the server is ok or not. """ if not data: return data res = method_fn(compute_simple_url(base_url, map_type_url[obj_type]), data=serial.dumps(data), headers={'Content-Type': serial.MIMETYPE}) return result_fn(res) # url mapping for lookup url_lookup_per_type = { store.Type.origin: "/origins/" , store.Type.content: "/vcs/contents/" , store.Type.directory: "/vcs/directories/" , store.Type.revision: "/vcs/revisions/" } def post(base_url, obj_type, obj_sha1s): """Retrieve the objects of type type with sha1 sha1hex. """ return execute(url_lookup_per_type, session_swh.post, base_url, obj_type, obj_sha1s, result_fn=lambda res: serial.loads(res.content)) # url mapping for storage url_store_per_type = { store.Type.origin: "/origins/" , store.Type.content: "/vcs/contents/" , store.Type.directory: "/vcs/directories/" , store.Type.revision: "/vcs/revisions/" , store.Type.release: "/vcs/releases/" , store.Type.occurrence: "/vcs/occurrences/" , store.Type.person: "/vcs/persons/" } def put(base_url, obj_type, obj): """Given an obj (map, simple object) of obj_type, PUT it in the backend. """ return execute(url_store_per_type, session_swh.put, base_url, obj_type, obj) diff --git a/swh/conf/__init__.py b/swh/loader/git/conf/__init__.py similarity index 100% rename from swh/conf/__init__.py rename to swh/loader/git/conf/__init__.py diff --git a/swh/conf/reader.py b/swh/loader/git/conf/reader.py similarity index 100% rename from swh/conf/reader.py rename to swh/loader/git/conf/reader.py diff --git a/swh/protocols/__init__.py b/swh/loader/git/data/__init__.py similarity index 100% rename from swh/protocols/__init__.py rename to swh/loader/git/data/__init__.py diff --git a/swh/data/swhrepo.py b/swh/loader/git/data/swhrepo.py similarity index 100% rename from swh/data/swhrepo.py rename to swh/loader/git/data/swhrepo.py diff --git a/swh/retry/__init__.py b/swh/loader/git/gitloader/__init__.py similarity index 100% rename from swh/retry/__init__.py rename to swh/loader/git/gitloader/__init__.py diff --git a/swh/gitloader/git.py b/swh/loader/git/gitloader/git.py similarity index 99% rename from swh/gitloader/git.py rename to swh/loader/git/gitloader/git.py index d85e0d9..d500ce0 100644 --- a/swh/gitloader/git.py +++ b/swh/loader/git/gitloader/git.py @@ -1,225 +1,225 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import glob import logging import os import subprocess import time import pygit2 from datetime import datetime from pygit2 import GIT_REF_OID from pygit2 import GIT_OBJ_COMMIT, GIT_OBJ_TREE, GIT_SORT_TOPOLOGICAL from enum import Enum from swh.core import hashutil -from swh.data import swhrepo -from swh.store import store +from swh.loader.git.data import swhrepo +from swh.loader.git.store import store class DirectoryTypeEntry(Enum): """Types of git objects. """ file = 'file' directory = 'directory' def date_format(d): """d is expected to be a datetime object. """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", d.timetuple()) def now(): """Cheat time values.""" return date_format(datetime.utcnow()) def timestamp_to_string(timestamp): """Convert a timestamps to string. """ return date_format(datetime.utcfromtimestamp(timestamp)) def list_objects_from_packfile_index(packfile_index): """List the objects indexed by this packfile""" input_file = open(packfile_index, 'rb') with subprocess.Popen( ['/usr/bin/git', 'show-index'], stdin=input_file, stdout=subprocess.PIPE, ) as process: for line in process.stdout.readlines(): obj_id = line.decode('utf-8', 'ignore').split()[1] yield obj_id def list_objects(repo): """List the objects in a given repository""" objects_dir = os.path.join(repo.path, 'objects') objects_glob = os.path.join(objects_dir, '[0-9a-f]' * 2, '[0-9a-f]' * 38) packfile_dir = os.path.join(objects_dir, 'pack') if os.path.isdir(packfile_dir): for packfile_index in os.listdir(packfile_dir): if not packfile_index.endswith('.idx'): # Not an index file continue packfile_index_path = os.path.join(packfile_dir, packfile_index) yield from list_objects_from_packfile_index(packfile_index_path) for object_file in glob.glob(objects_glob): yield ''.join(object_file.split(os.path.sep)[-2:]) HASH_ALGORITHMS=['sha1', 'sha256'] def parse(repo_path): """Given a repository path, parse and return a memory model of such repository.""" def read_signature(signature): return '%s <%s>' % (signature.name, signature.email) def treewalk(repo, tree): """Walk a tree with the same implementation as `os.path`. Returns: tree, trees, blobs """ trees, blobs, dir_entries = [], [], [] for tree_entry in tree: if swh_repo.already_visited(tree_entry.hex): logging.debug('tree_entry %s already visited, skipped' % tree_entry.hex) continue obj = repo.get(tree_entry.oid) if obj is None: # or obj.type == GIT_OBJ_COMMIT: logging.warn('skip submodule-commit %s' % tree_entry.hex) continue # submodule! if obj.type == GIT_OBJ_TREE: logging.debug('found tree %s' % tree_entry.hex) nature = DirectoryTypeEntry.directory.value trees.append(tree_entry) else: logging.debug('found content %s' % tree_entry.hex) data = obj.data nature = DirectoryTypeEntry.file.value hashes = hashutil.hashdata(data, HASH_ALGORITHMS) blobs.append({'id': obj.hex, 'type': store.Type.content, 'content-sha1': hashes['sha1'], 'content-sha256': hashes['sha256'], 'content': data, 'size': obj.size}) dir_entries.append({'name': tree_entry.name, 'type': store.Type.directory_entry, 'target-sha1': obj.hex, 'nature': nature, 'perms': tree_entry.filemode, 'atime': None, 'mtime': None, 'ctime': None, 'parent': tree.hex}) yield tree, dir_entries, trees, blobs for tree_entry in trees: for x in treewalk(repo, repo[tree_entry.oid]): yield x def walk_tree(repo, swh_repo, rev): """Walk the rev revision's directories. """ if swh_repo.already_visited(rev.hex): logging.debug('commit %s already visited, skipped' % rev.hex) return swh_repo for dir_root, dir_entries, _, contents_ref in treewalk(repo, rev.tree): for content_ref in contents_ref: swh_repo.add_content(content_ref) swh_repo.add_directory({'id': dir_root.hex, 'type': store.Type.directory, 'entries': dir_entries}) revision_parent_sha1s = list(map(str, rev.parent_ids)) author = {'name': rev.author.name, 'email': rev.author.email, 'type': store.Type.person} committer = {'name': rev.committer.name, 'email': rev.committer.email, 'type': store.Type.person} swh_repo.add_revision({'id': rev.hex, 'type':store.Type.revision, 'date': timestamp_to_string(rev.commit_time), 'directory': rev.tree.hex, 'message': rev.message, 'committer': committer, 'author': author, 'parent-sha1s': revision_parent_sha1s }) swh_repo.add_person(read_signature(rev.author), author) swh_repo.add_person(read_signature(rev.committer), committer) return swh_repo def walk_revision_from(repo, swh_repo, head_rev): """Walk the rev history log from head_rev. - repo is the current repository - rev is the latest rev to start from. """ for rev in repo.walk(head_rev.id, GIT_SORT_TOPOLOGICAL): swh_repo = walk_tree(repo, swh_repo, rev) return swh_repo repo = pygit2.Repository(repo_path) # memory model swh_repo = swhrepo.SWHRepo() # add origin origin = {'type': 'git', 'url': 'file://' + repo.path} swh_repo.add_origin(origin) # add references and crawl them for ref_name in repo.listall_references(): logging.info('walk reference %s' % ref_name) ref = repo.lookup_reference(ref_name) head_rev = repo[ref.target] \ if ref.type is GIT_REF_OID \ else ref.peel(GIT_OBJ_COMMIT) # noqa if isinstance(head_rev, pygit2.Tag): head_start = head_rev.get_object() taggerSig = head_rev.tagger author = {'name': taggerSig.name, 'email': taggerSig.email, 'type': store.Type.person} release = {'id': head_rev.hex, 'type': store.Type.release, 'revision': head_rev.target.hex, 'name': ref_name, 'date': now(), # FIXME: find the tag's date, 'author': author, 'comment': head_rev.message} swh_repo.add_release(release) swh_repo.add_person(read_signature(taggerSig), author) else: swh_repo.add_occurrence({'id': head_rev.hex, 'revision': head_rev.hex, 'reference': ref_name, 'url-origin': origin['url'], 'type': store.Type.occurrence}) head_start = head_rev # crawl commits and trees walk_revision_from(repo, swh_repo, head_start) return swh_repo diff --git a/swh/gitloader/loader.py b/swh/loader/git/gitloader/loader.py similarity index 95% rename from swh/gitloader/loader.py rename to swh/loader/git/gitloader/loader.py index d222ee0..4dfce9d 100644 --- a/swh/gitloader/loader.py +++ b/swh/loader/git/gitloader/loader.py @@ -1,53 +1,53 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os -from swh.gitloader import git, remote_store, local_store +from swh.loader.git.gitloader import git, remote_store, local_store _load_to_back_fn = {'remote': remote_store.load_to_back ,'local': local_store.prepare_and_load_to_back } def check_user_conf(conf): """Check the user's configuration and rejects if problems. """ action = conf['action'] if action != 'load': return 'skip unknown action %s' % action backend_type = conf['backend-type'] if backend_type not in _load_to_back_fn: return 'skip unknown backend-type %s (only `remote`, `local` supported)' % backend_type repo_path = conf['repo_path'] if not os.path.exists(repo_path): return 'Repository %s does not exist.' % repo_path return None def load(conf): """According to action, load the repo_path. used configuration keys: - action: requested action - repo_path: git repository path ('load' action only) - backend-type: backend access's type (remote or local) - backend: url access to backend api """ error_msg = check_user_conf(conf) if error_msg: logging.error(error_msg) raise Exception(error_msg) repo_path = conf['repo_path'] logging.info('load repo_path %s' % repo_path) swhrepo = git.parse(repo_path) _load_to_back_fn[conf['backend-type']](conf['backend'], swhrepo) diff --git a/swh/gitloader/local_store.py b/swh/loader/git/gitloader/local_store.py similarity index 97% rename from swh/gitloader/local_store.py rename to swh/loader/git/gitloader/local_store.py index 7bff576..3ce88ad 100644 --- a/swh/gitloader/local_store.py +++ b/swh/loader/git/gitloader/local_store.py @@ -1,95 +1,95 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.store import store, db, service -from swh.conf import reader +from swh.loader.git.store import store, db, service +from swh.loader.git.conf import reader from swh.storage.objstorage import ObjStorage # FIXME: duplicated from bin/swh-backend... # Default configuration file DEFAULT_CONF_FILE = '~/.config/swh/back.ini' # default configuration DEFAULT_CONF = { 'content_storage_dir': ('string', '/tmp/swh-loader-git/content-storage'), 'log_dir': ('string', '/tmp/swh-loader-git/log'), 'db_url': ('string', 'dbname=softwareheritage-dev'), 'folder_depth': ('int', 4), 'debug': ('bool', None), 'host': ('string', '127.0.0.1'), 'port': ('int', 5000) } def store_only_new(db_conn, conf, obj_type, obj): """Store object if not already present. """ if not store.find(db_conn, obj['id'], obj_type): store.add(db_conn, conf, obj) _obj_to_persist_fn = {store.Type.revision: service.add_revisions} def store_unknown_objects(db_conn, conf, obj_type, swhmap): """Load objects to the backend. """ sha1s = swhmap.keys() # have: filter unknown obj unknown_obj_sha1s = service.filter_unknowns_type(db_conn, obj_type, sha1s) if not unknown_obj_sha1s: return True # seen: now store in backend persist_fn = _obj_to_persist_fn.get(obj_type, service.add_objects) obj_fulls = map(swhmap.get, unknown_obj_sha1s) return persist_fn(db_conn, conf, obj_type, obj_fulls) def load_to_back(conf, swh_repo): """Load to the backend the repository swh_repo. """ with db.connect(conf['db_url']) as db_conn: # First, store/retrieve the origin identifier # FIXME: should be done by the cloner worker (which is not yet plugged # on the right swh db ftm) service.add_origin(db_conn, swh_repo.get_origin()) # First reference all unknown persons service.add_persons(db_conn, conf, store.Type.person, swh_repo.get_persons()) res = store_unknown_objects(db_conn, conf, store.Type.content, swh_repo.get_contents()) if res: res = store_unknown_objects(db_conn, conf, store.Type.directory, swh_repo.get_directories()) if res: res = store_unknown_objects(db_conn, conf, store.Type.revision, swh_repo.get_revisions()) if res: # brutally send all remaining occurrences service.add_objects(db_conn, conf, store.Type.occurrence, swh_repo.get_occurrences()) # and releases (the idea here is that compared to existing # objects, the quantity is less) service.add_objects(db_conn, conf, store.Type.release, swh_repo.get_releases()) def prepare_and_load_to_back(backend_setup_file, swh_repo): # Read the configuration file (no check yet) conf = reader.read(backend_setup_file or DEFAULT_CONF_FILE, DEFAULT_CONF) reader.prepare_folders(conf, 'content_storage_dir') conf.update({ 'objstorage': ObjStorage(conf['content_storage_dir'], conf['folder_depth']) }) load_to_back(conf, swh_repo) diff --git a/swh/gitloader/remote_store.py b/swh/loader/git/gitloader/remote_store.py similarity index 96% rename from swh/gitloader/remote_store.py rename to swh/loader/git/gitloader/remote_store.py index 5028ef0..e90a396 100644 --- a/swh/gitloader/remote_store.py +++ b/swh/loader/git/gitloader/remote_store.py @@ -1,63 +1,63 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.store import store -from swh.client import http +from swh.loader.git.store import store +from swh.loader.git.client import http def store_unknown_objects(back_url, obj_type, swhmap): """Load objects to the backend. """ sha1s = list(swhmap.keys()) # have: filter unknown obj unknown_obj_sha1s = http.post(back_url, obj_type, sha1s) if not unknown_obj_sha1s: return True # store unknown objects return http.put(back_url, obj_type, map(swhmap.get, unknown_obj_sha1s)) def load_to_back(back_url, swh_repo): """Load to the back_url the repository swh_repo. """ # First, store/retrieve the origin identifier # FIXME: should be done by the cloner worker (which is not yet plugged on # the right swh db ftm) http.put(back_url, obj_type=store.Type.origin, obj=swh_repo.get_origin()) http.put(back_url, obj_type=store.Type.person, obj=list(swh_repo.get_persons())) - + # let the backend and api discuss what's really needed # - first this worker sends the checksums # - then the backend answers the checksums it does not know # - then the worker sends only what the backend does not know per # object type basis res = store_unknown_objects(back_url, store.Type.content, swh_repo.get_contents()) if res: res = store_unknown_objects(back_url, store.Type.directory, swh_repo.get_directories()) if res: res = store_unknown_objects(back_url, store.Type.revision, swh_repo.get_revisions()) if res: # brutally send all remaining occurrences http.put(back_url, store.Type.occurrence, swh_repo.get_occurrences()) # and releases (the idea here is that compared to existing # other objects, the quantity is less) http.put(back_url, store.Type.release, swh_repo.get_releases()) # FIXME: deal with collision failures which should be raised by backend. diff --git a/swh/manager.py b/swh/loader/git/manager.py similarity index 94% rename from swh/manager.py rename to swh/loader/git/manager.py index 4d98121..18dba46 100755 --- a/swh/manager.py +++ b/swh/loader/git/manager.py @@ -1,27 +1,27 @@ #!/usr/bin/env python3 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging -from swh.store import db, models +from swh.loader.git.store import db, models def manage(action, db_url): """According to action, load the repository. used configuration keys: - action: requested action [cleandb|initdb] """ with db.connect(db_url) as db_conn: if action == 'cleandb': logging.info('clean database') models.cleandb(db_conn) elif action == 'initdb': logging.info('initialize database') models.initdb(db_conn) else: logging.warn('skip unknown-action %s' % action) diff --git a/swh/backend/__init__.py b/swh/loader/git/protocols/__init__.py similarity index 100% copy from swh/backend/__init__.py copy to swh/loader/git/protocols/__init__.py diff --git a/swh/protocols/serial.py b/swh/loader/git/protocols/serial.py similarity index 100% rename from swh/protocols/serial.py rename to swh/loader/git/protocols/serial.py diff --git a/swh/backend/__init__.py b/swh/loader/git/retry/__init__.py similarity index 100% rename from swh/backend/__init__.py rename to swh/loader/git/retry/__init__.py diff --git a/swh/retry/policy.py b/swh/loader/git/retry/policy.py similarity index 100% rename from swh/retry/policy.py rename to swh/loader/git/retry/policy.py diff --git a/swh/store/__init__.py b/swh/loader/git/store/__init__.py similarity index 100% rename from swh/store/__init__.py rename to swh/loader/git/store/__init__.py diff --git a/swh/store/db.py b/swh/loader/git/store/db.py similarity index 100% rename from swh/store/db.py rename to swh/loader/git/store/db.py diff --git a/swh/store/models.py b/swh/loader/git/store/models.py similarity index 98% rename from swh/store/models.py rename to swh/loader/git/store/models.py index e80df24..1770ef9 100644 --- a/swh/store/models.py +++ b/swh/loader/git/store/models.py @@ -1,285 +1,285 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from enum import Enum -from swh.store import db +from . import db class Type(Enum): """Types of git objects. """ occurrence = 'occurrence' # ~git branch release = 'release' # ~git annotated tag revision = 'revision' # ~git commit directory = 'directory' # ~git tree directory_entry = 'directory_entry' # ~git tree_entry content = 'content' # ~git blob origin = 'origin' person = 'person' # committer, tagger, author def initdb(db_conn): """For retrocompatibility. """ pass def cleandb(db_conn): db.queries_execute(db_conn, ['TRUNCATE TABLE release CASCADE', 'TRUNCATE TABLE revision CASCADE', 'TRUNCATE TABLE directory CASCADE', 'TRUNCATE TABLE content CASCADE', 'TRUNCATE TABLE occurrence_history CASCADE', 'TRUNCATE TABLE occurrence CASCADE', 'TRUNCATE TABLE origin CASCADE', 'TRUNCATE TABLE person CASCADE', ]) def add_origin(db_conn, url, type, parent=None): """Insert origin and returns the newly inserted id. """ return db.insert(db_conn, ("""INSERT INTO origin (type, url, parent_id) VALUES (%s, %s, %s) RETURNING id""", (type, url, parent))) def add_person(db_conn, name, email): """Insert author and returns the newly inserted id. """ return db.insert(db_conn, ("""INSERT INTO person (name, email) VALUES (%s, %s) RETURNING id""", (name, email))) def add_content(db_conn, sha1, sha1_content, sha256_content, size): """Insert a new content. """ db.query_execute(db_conn, ("""INSERT INTO content (id, sha1, sha256, length) VALUES (%s, %s, %s, %s)""", (sha1, sha1_content, sha256_content, size))) def add_directory(db_conn, obj_sha): """Insert a new directory. """ db.query_execute(db_conn, ("""INSERT INTO directory (id) VALUES (%s)""", (obj_sha,))) def add_directory_entry(db_conn, name, sha, type, perms, atime, mtime, ctime, parent): """Insert a new directory. """ db.query_execute(db_conn, ("""INSERT INTO directory_entry (name, id, type, perms, atime, mtime, ctime, directory) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""", (name, sha, type, perms, atime, mtime, ctime, parent))) def add_revision(db_conn, sha, date, directory, message, author, committer, parent_shas=None): """Insert a revision. """ db.query_execute(db_conn, ("""INSERT INTO revision (id, date, directory, message, author, committer) - VALUES (%s, %s, %s, %s, + VALUES (%s, %s, %s, %s, (select id from person where name=%s and email=%s), (select id from person where name=%s and email=%s))""", (sha, date, directory, message, author['name'], author['email'], committer['name'], committer['email']))) def add_revision_history(db_conn, couple_parents): """Store the revision history graph. """ tuples = ','.join(["('%s','%s')" % couple for couple in couple_parents]) query = 'INSERT INTO revision_history (id, parent_id) VALUES ' + tuples db.query_execute(db_conn, query) def add_release(db_conn, obj_sha, revision, date, name, comment, author): """Insert a release. """ db.query_execute(db_conn, ("""INSERT INTO release (id, revision, date, name, comment, author) - VALUES (%s, %s, %s, %s, %s, + VALUES (%s, %s, %s, %s, %s, (select id from person where name=%s and email=%s))""", (obj_sha, revision, date, name, comment, author['name'], author['email']))) def add_occurrence(db_conn, url_origin, reference, revision): """Insert an occurrence. Check if occurrence history already present. If present do nothing, otherwise insert """ with db_conn.cursor() as cur: occ = find_occurrence(cur, reference, revision, url_origin) if not occ: db.execute( cur, ("""INSERT INTO occurrence (origin, reference, revision) VALUES ((select id from origin where url=%s), %s, %s)""", (url_origin, reference, revision))) def find_revision(db_conn, obj_sha): """Find a revision by its obj_sha. """ return find_object(db_conn, obj_sha, Type.revision) def find_directory(db_conn, obj_sha): """Find a directory by its obj_sha. """ return find_object(db_conn, obj_sha, Type.directory) def find_content(db_conn, obj_sha): """Find a content by its obj_sha. """ return find_object(db_conn, obj_sha, Type.content) def find_occurrences_for_revision(db_conn, revision, type): """Find all occurences for a specific revisions. type is not used (implementation detail). """ return db.query_fetch(db_conn, ("""SELECT * FROM occurrence WHERE revision=%s""", (revision,))) def find_origin(db_conn, origin_url, origin_type): """Find all origins matching an url and an origin type. """ return db.query_fetchone(db_conn, ("""SELECT * FROM origin WHERE url=%s AND type=%s""", (origin_url, origin_type))) def find_person(db_conn, email, name): """Find a person uniquely identified by email and name. """ return db.query_fetchone(db_conn, ("""SELECT id FROM person WHERE email=%s AND name=%s""", (email, name))) def find_occurrence(cur, reference, revision, url_origin): """Find an ocurrence with reference pointing on valid revision for date. """ return db.fetchone( cur, ("""SELECT * FROM occurrence oc WHERE reference=%s AND revision=%s AND origin = (select id from origin where url = %s)""", (reference, revision, url_origin))) def find_object(db_conn, obj_sha, obj_type): """Find an object of obj_type by its obj_sha. """ table = obj_type if isinstance(obj_type, str) else obj_type.value query = 'select id from ' + table + ' where id=%s' return db.query_fetchone(db_conn, (query, (obj_sha,))) def filter_unknown_objects(db_conn, file_sha1s, table_to_filter, tbl_tmp_name): """Given a list of sha1s, filter the unknown object between this list and the content of the table table_to_filter. tbl_tmp_name is the temporary table used to filter. """ with db_conn.cursor() as cur: # explicit is better than implicit # simply creating the temporary table seems to be enough db.execute(cur, """CREATE TEMPORARY TABLE IF NOT EXISTS %s( id git_object_id) ON COMMIT DELETE ROWS;""" % tbl_tmp_name) db.copy_from(cur, file_sha1s, tbl_tmp_name) db.execute(cur, '(SELECT id FROM %s) EXCEPT (SELECT id FROM %s);' % (tbl_tmp_name, table_to_filter)) return cur.fetchall() def find_unknown_revisions(db_conn, file_sha1s): """Filter unknown revisions from file_sha1s. """ return filter_unknown_objects(db_conn, file_sha1s, 'revision', 'filter_sha1_revision') def find_unknown_directories(db_conn, file_sha1s): """Filter unknown directories from file_sha1s. """ return filter_unknown_objects(db_conn, file_sha1s, 'directory', 'filter_sha1_directory') def find_unknown_contents(db_conn, file_sha1s): """Filter unknown contents from file_sha1s. """ return filter_unknown_objects(db_conn, file_sha1s, 'content', 'filter_sha1_content') def _count_objects(db_conn, type): return db.query_fetchone(db_conn, 'SELECT count(*) FROM ' + type.value)[0] def count_revisions(db_conn): """Count the number of revisions. """ return _count_objects(db_conn, Type.revision) def count_directories(db_conn): """Count the number of directories. """ return _count_objects(db_conn, Type.directory) def count_contents(db_conn): """Count the number of contents. """ return _count_objects(db_conn, Type.content) def count_occurrence(db_conn): """Count the number of occurrence. """ return _count_objects(db_conn, Type.occurrence) def count_release(db_conn): """Count the number of occurrence. """ return _count_objects(db_conn, Type.release) def count_person(db_conn): """Count the number of occurrence. """ return _count_objects(db_conn, Type.person) diff --git a/swh/store/service.py b/swh/loader/git/store/service.py similarity index 98% rename from swh/store/service.py rename to swh/loader/git/store/service.py index 55e0294..d9a592d 100644 --- a/swh/store/service.py +++ b/swh/loader/git/store/service.py @@ -1,96 +1,96 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.store import store +from . import store filter_unknowns_type = store.find_unknowns def find_origin(db_conn, origin): """Find origin. """ origin_found = store.find_origin(db_conn, origin) return None if not origin_found else {'id': origin_found[0]} def find_person(db_conn, person): """Find person. """ person_found = store.find_person(db_conn, person) return None if not person_found else {'id': person_found[0]} def add_origin(db_conn, origin): """Add origin if not already existing. """ origin_found = store.find_origin(db_conn, origin) id = origin_found[0] if origin_found else store.add_origin(db_conn, origin) return {'id': id} def add_revisions(db_conn, conf, obj_type, objs): """Add Revisions. """ couple_parents = [] for obj in objs: # iterate over objects of type uri_type obj_id = obj['id'] obj_found = store.find(db_conn, obj_id, obj_type) if not obj_found: store.add(db_conn, conf, obj_id, obj_type, obj) # deal with revision history par_shas = obj.get('parent-sha1s', None) if par_shas: couple_parents.extend([(obj_id, p) for p in par_shas]) store.add_revision_history(db_conn, couple_parents) return True def add_persons(db_conn, conf, obj_type, objs): """Add persons. conf, obj_type are not used (implementation detail.) """ for obj in objs: obj_found = store.find_person(db_conn, obj) if not obj_found: store.add_person(db_conn, obj) return True # dispatch map to add in storage with fs or not _add_fn = {store.Type.content: store.add_with_fs_storage} def add_objects(db_conn, conf, obj_type, objs): """Add objects if not already present in the storage. """ add_fn = _add_fn.get(obj_type, store.add) res = [] for obj in objs: # iterate over objects of type uri_type obj_id = obj['id'] obj_found = store.find(db_conn, obj_id, obj_type) if not obj_found: obj = add_fn(db_conn, conf, obj_id, obj_type, obj) res.append(obj) else: res.append(obj_found) return res _persist_fn = {store.Type.person: add_persons, store.Type.revision: add_revisions} def persist(db_conn, conf, obj_type, objs): """Generic call to persist persons, revisions or other objects. """ persist_fn = _persist_fn.get(obj_type, add_objects) return persist_fn(db_conn, conf, obj_type, objs) diff --git a/swh/store/store.py b/swh/loader/git/store/store.py similarity index 99% rename from swh/store/store.py rename to swh/loader/git/store/store.py index 51afaed..b9638e5 100755 --- a/swh/store/store.py +++ b/swh/loader/git/store/store.py @@ -1,198 +1,198 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import StringIO -from swh.store import models +from . import models Type = models.Type _find_object = {Type.occurrence: models.find_occurrences_for_revision} def find(db_conn, id, type): """Find an object according to its sha1hex and type. """ find_fn = _find_object.get(type, models.find_object) return find_fn(db_conn, id, type) _find_unknown = {Type.revision: models.find_unknown_revisions, Type.content: models.find_unknown_contents, Type.directory: models.find_unknown_directories} def find_unknowns(db_conn, obj_type, sha1s_hex): """Given a list of sha1s, return the non presents one in storage. """ def row_to_sha1(row): """Convert a row (memoryview) to a string sha1. """ return row[0] vals = '\n'.join(sha1s_hex) cpy_data_buffer = StringIO() cpy_data_buffer.write(vals) cpy_data_buffer.seek(0) # move file cursor back at start of file find_unknown_fn = _find_unknown[obj_type] unknowns = find_unknown_fn(db_conn, cpy_data_buffer) cpy_data_buffer.close() return list(map(row_to_sha1, unknowns)) def _add_content(db_conn, vcs_object, sha1hex): """Add a blob to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_content(db_conn, sha1hex, vcs_object['content-sha1'], vcs_object['content-sha256'], vcs_object['size']) return sha1hex def _add_directory(db_conn, vcs_object, sha1hex): """Add a directory to storage. Designed to be wrapped in a db transaction. """ models.add_directory(db_conn, sha1hex) for directory_entry in vcs_object['entries']: _add_directory_entry(db_conn, directory_entry) return sha1hex def _add_directory_entry(db_conn, vcs_object): """Add a directory to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ name = vcs_object['name'] parent = vcs_object['parent'] models.add_directory_entry(db_conn, name, vcs_object['target-sha1'], vcs_object['nature'], vcs_object['perms'], vcs_object['atime'], vcs_object['mtime'], vcs_object['ctime'], parent) return name, parent def _add_revision(db_conn, vcs_object, sha1hex): """Add a revision to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_revision(db_conn, sha1hex, vcs_object['date'], vcs_object['directory'], vcs_object['message'], vcs_object['author'], vcs_object['committer'], vcs_object['parent-sha1s']) return sha1hex def _add_release(db_conn, vcs_object, sha1hex): """Add a release. """ models.add_release(db_conn, sha1hex, vcs_object['revision'], vcs_object['date'], vcs_object['name'], vcs_object['comment'], vcs_object['author']) return sha1hex def _add_occurrence(db_conn, vcs_object, sha1hex): """Add an occurrence. """ models.add_occurrence(db_conn, vcs_object['url-origin'], vcs_object['reference'], vcs_object['revision']) return sha1hex def add_person(db_conn, vcs_object): """Add an author. """ return models.add_person(db_conn, vcs_object['name'], vcs_object['email']) _store_fn = {Type.directory: _add_directory, Type.revision: _add_revision, Type.release: _add_release, Type.occurrence: _add_occurrence} def add_origin(db_conn, origin): """A a new origin and returns its id. """ return models.add_origin(db_conn, origin['url'], origin['type']) def find_origin(db_conn, origin): """Find an existing origin. """ return models.find_origin(db_conn, origin['url'], origin['type']) def find_person(db_conn, person): """Find an existing person. """ return models.find_person(db_conn, person['email'], person['name']) def add_with_fs_storage(db_conn, config, id, type, vcs_object): """Add vcs_object in the storage - db_conn is the opened connection to the db - config is the map of configuration needed for core layer - type is not used here but represent the type of vcs_object - vcs_object is the object meant to be persisted in fs and db """ config['objstorage'].add_bytes(vcs_object['content'], id) # FIXME use this id return _add_content(db_conn, vcs_object, id) def add(db_conn, config, id, type, vcs_object): """Given a sha1hex, type and content, store a given object in the store. - db_conn is the opened connection to the db - config is not used here - type is the object's type - vcs_object is the object meant to be persisted in db """ return _store_fn[type](db_conn, vcs_object, id) def add_revision_history(db_conn, couple_parents): """Given a list of tuple (sha, parent_sha), store in revision_history. """ if len(couple_parents) > 0: models.add_revision_history(db_conn, couple_parents) diff --git a/swh/tests/test_api_content.py b/swh/loader/git/tests/test_api_content.py similarity index 97% rename from swh/tests/test_api_content.py rename to swh/loader/git/tests/test_api_content.py index 2fffb27..0582225 100644 --- a/swh/tests/test_api_content.py +++ b/swh/loader/git/tests/test_api_content.py @@ -1,110 +1,110 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import app_client, app_client_teardown @attr('slow') class ContentTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.content_sha1_id = '222222f9dd5dc46ee476a8be155ab049994f717e' content_sha1_id = 'blabliblablo' self.content_sha256_hex = '222222f9dd5dc46ee476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, content_sha1_id, self.content_sha256_hex, 10) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_content_ok(self): # when rv = self.app.get('/vcs/contents/%s' % self.content_sha1_id) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == '222222f9dd5dc46ee476a8be155ab049994f717e' @istest def get_content_not_found(self): # when rv = self.app.get('/vcs/contents/222222f9dd5dc46ee476a8be155ab049994f7170') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_content_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/contents/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_content_create_and_update(self): content_sha1 = 'sha1-contentc46ee476a8be155ab03333333333' # does not exist rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = {'id': content_sha1, 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', 'content': b'bar', 'size': '3'} rv = self.app.put('/vcs/contents/%s' % content_sha1, data=serial.dumps(body), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' # # we update it body = {'id': content_sha1, 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', 'content': b'bar', 'size': '3'} rv = self.app.put('/vcs/contents/%s' % content_sha1, data=serial.dumps(body), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' diff --git a/swh/tests/test_api_directory.py b/swh/loader/git/tests/test_api_directory.py similarity index 98% rename from swh/tests/test_api_directory.py rename to swh/loader/git/tests/test_api_directory.py index f73b0c1..987090d 100644 --- a/swh/tests/test_api_directory.py +++ b/swh/loader/git/tests/test_api_directory.py @@ -1,119 +1,119 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class DirectoryTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.content_sha1_id = 'content-sha1c46ee476a8be155ab049994f717e' content_sha1_hex = 'content-sha1c46ee476a8be155ab049994f717e' content_sha256_hex = 'content-sha2566ee476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, content_sha1_hex, content_sha256_hex, 10) self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_directory_ok(self): # when rv = self.app.get('/vcs/directories/%s' % self.directory_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f717e' @istest def get_directory_not_found(self): # when rv = self.app.get('/vcs/directories/111111f9dd5dc46ee476a8be155ab049994f7170') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_directory_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/directories/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_directory_create_and_update(self): directory_sha1='directory-sha16ee476a8be155ab049994f7170' # does not exist rv = self.app.get('/vcs/directories/%s' % directory_sha1) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'entries': [{'name': 'filename', 'target-sha1': self.content_sha1_id, 'nature': 'file', 'perms': '000', 'atime': now(), 'mtime': now(), 'ctime': now(), 'parent': directory_sha1}, {'name': 'dirname', 'target-sha1': self.directory_sha1_hex, 'nature': 'directory', 'perms': '012', 'atime': now(), 'mtime': now(), 'ctime': now(), 'parent': directory_sha1} ]}) rv = self.app.put('/vcs/directories/%s' % directory_sha1, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/directories/%s' % directory_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' # we update it rv = self.app.put('/vcs/directories/directory-sha16ee476a8be155ab049994f7170', data=serial.dumps({'entry': 'directory-bar'}), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/directories/directory-sha16ee476a8be155ab049994f7170') # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' diff --git a/swh/tests/test_api_home.py b/swh/loader/git/tests/test_api_home.py similarity index 100% rename from swh/tests/test_api_home.py rename to swh/loader/git/tests/test_api_home.py diff --git a/swh/tests/test_api_occurrence.py b/swh/loader/git/tests/test_api_occurrence.py similarity index 98% rename from swh/tests/test_api_occurrence.py rename to swh/loader/git/tests/test_api_occurrence.py index d9b18b0..334272f 100644 --- a/swh/tests/test_api_occurrence.py +++ b/swh/loader/git/tests/test_api_occurrence.py @@ -1,131 +1,131 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class OccurrenceTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') self.reference_name = 'master' models.add_occurrence(db_conn, self.origin_url, self.reference_name, self.revision_sha1_hex) self.reference_name2 = 'master2' models.add_occurrence(db_conn, self.origin_url, self.reference_name2, self.revision_sha1_hex) self.revision_sha1_hex_2 = '2-revision-sha1-to-test-existence9994f71' models.add_revision(db_conn, self.revision_sha1_hex_2, now(), self.directory_sha1_hex, "revision message 2", authorAndCommitter, authorAndCommitter) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_occurrence_ok(self): # when rv = self.app.get('/vcs/occurrences/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [self.reference_name, self.reference_name2] @istest def get_occurrence_not_found(self): # when rv = self.app.get('/vcs/occurrences/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_occurrence_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/occurrences/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_occurrence_create_and_update(self): occ_revision_sha1_hex = self.revision_sha1_hex_2 rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'revision': occ_revision_sha1_hex, # FIXME: redundant with the one from uri.. 'reference': 'master', 'url-origin': self.origin_url}) rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, # ... here data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] # we update it rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] diff --git a/swh/tests/test_api_origin.py b/swh/loader/git/tests/test_api_origin.py similarity index 97% rename from swh/tests/test_api_origin.py rename to swh/loader/git/tests/test_api_origin.py index c34adbf..4ac0ed4 100644 --- a/swh/tests/test_api_origin.py +++ b/swh/loader/git/tests/test_api_origin.py @@ -1,98 +1,98 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import app_client @attr('slow') class OriginTestCase(unittest.TestCase): def setUp(self): self.app, db_url, _ = app_client() with db.connect(db_url) as db_conn: self.origin_url = 'https://github.com/torvalds/linux.git' self.origin_type = 'git' self.origin_id = models.add_origin(db_conn, self.origin_url, self.origin_type) @istest def get_origin_ok(self): # when payload = {'url': self.origin_url, 'type': self.origin_type} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.origin_id @istest def get_origin_not_found(self): # when payload = {'url': 'unknown', 'type': 'blah'} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Origin not found!' @istest def get_origin_not_found_with_bad_format(self): # when rv = self.app.post('/origins/', data=serial.dumps({'url': 'unknown'}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 @istest def put_origin(self): # when payload = {'url': 'unknown', 'type': 'blah'} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Origin not found!' # when rv = self.app.put('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 # FIXME: 201 assert serial.loads(rv.data)['id'] payload = {'url': 'unknown', 'type': 'blah'} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 origin_id = serial.loads(rv.data)['id'] assert origin_id # when rv = self.app.put('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 # FIXME: 204 assert serial.loads(rv.data)['id'] == origin_id diff --git a/swh/tests/test_api_person.py b/swh/loader/git/tests/test_api_person.py similarity index 97% rename from swh/tests/test_api_person.py rename to swh/loader/git/tests/test_api_person.py index 4b81d60..9347e73 100644 --- a/swh/tests/test_api_person.py +++ b/swh/loader/git/tests/test_api_person.py @@ -1,98 +1,98 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import app_client @attr('slow') class PersonTestCase(unittest.TestCase): def setUp(self): self.app, db_url, _ = app_client() with db.connect(db_url) as db_conn: self.person_name = 'some-name' self.person_email = 'some@mail.git' self.person_id = models.add_person(db_conn, self.person_name, self.person_email) @istest def get_person_ok(self): # when person = {'name': self.person_name, 'email': self.person_email} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.person_id @istest def get_person_not_found(self): # when person = {'name': 'unknown', 'email': 'blah'} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Person not found!' @istest def get_person_not_found_with_bad_format(self): # when rv = self.app.post('/vcs/persons/', data=serial.dumps({'name': 'unknown'}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 @istest def put_person(self): # when person = {'name': 'unknown', 'email': 'blah'} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Person not found!' # when rv = self.app.put('/vcs/persons/', data=serial.dumps([person]), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 204 assert rv.data == b'' person = {'name': 'unknown', 'email': 'blah'} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 person_id = serial.loads(rv.data)['id'] assert person_id # when rv = self.app.put('/vcs/persons/', data=serial.dumps([person, person]), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 204 assert rv.data == b'' diff --git a/swh/tests/test_api_post_per_type.py b/swh/loader/git/tests/test_api_post_per_type.py similarity index 98% rename from swh/tests/test_api_post_per_type.py rename to swh/loader/git/tests/test_api_post_per_type.py index 25f6ab4..8212252 100644 --- a/swh/tests/test_api_post_per_type.py +++ b/swh/loader/git/tests/test_api_post_per_type.py @@ -1,213 +1,213 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class TestPostObjectsPerTypeCase(unittest.TestCase): def setUp(self): self.app, self.db_url, self.content_storage_dir = app_client() with db.connect(self.db_url) as db_conn: self.content_sha1_id = 'sha1-content0-6ee476a8be155ab049994f717e' self.content_sha256_hex = 'sha256-content0-e476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, self.content_sha1_id, self.content_sha256_hex, 10) self.directory_sha1_hex = 'directory-sha1-ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) authorAndCommitter2 = {'name': 'tony', 'email': 'tony@dude.org'} models.add_person(db_conn, authorAndCommitter2['name'], authorAndCommitter2['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.revision_sha1_hex2 = 'revision-sha1-2-for-testing-put-occurr' models.add_revision(db_conn, self.revision_sha1_hex2, now(), self.directory_sha1_hex, "revision message", authorAndCommitter2, authorAndCommitter2, parent_shas=['revision-sha1-to-test-existence9994f717e']) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", authorAndCommitter2) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') models.add_occurrence(db_conn, self.origin_url, 'master', self.revision_sha1_hex) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def post_all_non_presents_contents(self): # given # when payload = [self.content_sha1_id, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/contents/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_directories(self): # given # when payload = [self.directory_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/directories/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_revisions(self): # given # when payload = [self.revision_sha1_hex, self.revision_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/revisions/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_releases(self): # given # when payload = [self.release_sha1_hex, self.release_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/releases/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 assert rv.data == b'Bad request. Type not supported!' @istest def post_all_non_presents_occurrences_KO(self): # given # when payload = [self.revision_sha1_hex, self.revision_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/occurrences/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 assert rv.data == b'Bad request. Type not supported!' @istest def post_non_presents_objects_empty_payload_so_empty_results(self): # given # when for api_type in ['contents', 'directories', 'revisions']: rv = self.app.post('/vcs/%s/' % api_type, data=serial.dumps({}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [] @istest def post_non_presents_objects_bad_requests_format_pickle(self): # given # when for api_type in ['contents', 'directories', 'revisions']: rv = self.app.post('/vcs/%s/' % api_type, data="not pickle -> fail") # then assert rv.status_code == 400 assert rv.data == b'Bad request. Expected application/octet-stream data!' diff --git a/swh/tests/test_api_release.py b/swh/loader/git/tests/test_api_release.py similarity index 97% rename from swh/tests/test_api_release.py rename to swh/loader/git/tests/test_api_release.py index 4c1fc47..4c9f8e9 100644 --- a/swh/tests/test_api_release.py +++ b/swh/loader/git/tests/test_api_release.py @@ -1,119 +1,119 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class ReleaseTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.tagAuthor = {'name': 'tony', 'email': 'tony@mail.org'} models.add_person(db_conn, self.tagAuthor['name'], self.tagAuthor['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.tagAuthor, self.tagAuthor) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", self.tagAuthor) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_release_ok(self): # when rv = self.app.get('/vcs/releases/%s' % self.release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.release_sha1_hex @istest def get_release_not_found(self): # when rv = self.app.get('/vcs/releases/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_release_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/releases/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_release_create_and_update(self): release_sha1_hex = 'sha1-release46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'id': release_sha1_hex, 'revision': self.revision_sha1_hex, 'date': now(), 'name': '0.0.1', 'comment': 'super release tagged by ardumont', 'author': self.tagAuthor}) rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex # we update it rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex diff --git a/swh/tests/test_api_revision.py b/swh/loader/git/tests/test_api_revision.py similarity index 97% rename from swh/tests/test_api_revision.py rename to swh/loader/git/tests/test_api_revision.py index e29051d..4ff3eea 100644 --- a/swh/tests/test_api_revision.py +++ b/swh/loader/git/tests/test_api_revision.py @@ -1,110 +1,110 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr -from swh.store import db, models -from swh.protocols import serial +from swh.loader.git.store import db, models +from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class RevisionTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, self.authorAndCommitter['name'], self.authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.authorAndCommitter, self.authorAndCommitter) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_revision_ok(self): # when rv = self.app.get('/vcs/revisions/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.revision_sha1_hex @istest def get_revision_not_found(self): # when rv = self.app.get('/vcs/revisions/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_revision_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/revisions/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_revision_create_and_update(self): revision_sha1_hex = 'sha1-revision46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'date': now(), 'directory': self.directory_sha1_hex, 'message': 'revision message describing it', 'committer': self.authorAndCommitter, 'author': self.authorAndCommitter, 'parent-sha1s': [self.revision_sha1_hex]}) rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex # we update it rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex diff --git a/swh/tests/test_git_utils.py b/swh/loader/git/tests/test_git_utils.py similarity index 100% rename from swh/tests/test_git_utils.py rename to swh/loader/git/tests/test_git_utils.py diff --git a/swh/tests/test_http.py b/swh/loader/git/tests/test_http.py similarity index 95% rename from swh/tests/test_http.py rename to swh/loader/git/tests/test_http.py index 7a92bd9..afc640c 100644 --- a/swh/tests/test_http.py +++ b/swh/loader/git/tests/test_http.py @@ -1,41 +1,41 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest -from swh.client import http -from swh.store import store +from swh.loader.git.client import http +from swh.loader.git.store import store class TestHttp(unittest.TestCase): @istest def url(self): # when s = http.compute_simple_url('http://base-url', '/end') # then assert s == 'http://base-url/end' @istest def url_lookup_per_type(self): # then assert http.url_lookup_per_type == { store.Type.origin: "/origins/" , store.Type.content: "/vcs/contents/" , store.Type.directory: "/vcs/directories/" , store.Type.revision: "/vcs/revisions/" } @istest def url_store_per_type(self): # then assert http.url_store_per_type == { store.Type.origin: "/origins/" , store.Type.content: "/vcs/contents/" , store.Type.directory: "/vcs/directories/" , store.Type.revision: "/vcs/revisions/" , store.Type.release: "/vcs/releases/" , store.Type.occurrence: "/vcs/occurrences/" , store.Type.person: "/vcs/persons/" } diff --git a/swh/tests/test_initdb.py b/swh/loader/git/tests/test_initdb.py similarity index 89% rename from swh/tests/test_initdb.py rename to swh/loader/git/tests/test_initdb.py index e3f20b9..eb30c2a 100644 --- a/swh/tests/test_initdb.py +++ b/swh/loader/git/tests/test_initdb.py @@ -1,13 +1,13 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.manager import manage +from swh.loader.git.manager import manage def prepare_db(db_url): """DB fresh start. """ manage('cleandb', db_url) manage('initdb', db_url) diff --git a/swh/tests/test_local_loader.py b/swh/loader/git/tests/test_local_loader.py similarity index 98% rename from swh/tests/test_local_loader.py rename to swh/loader/git/tests/test_local_loader.py index ac9a741..4daeb0c 100644 --- a/swh/tests/test_local_loader.py +++ b/swh/loader/git/tests/test_local_loader.py @@ -1,249 +1,249 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil import os from nose.plugins.attrib import attr from nose.tools import istest -from swh.store import db, models -from swh.gitloader import loader -from swh.conf import reader +from swh.loader.git.store import db, models +from swh.loader.git.gitloader import loader +from swh.loader.git.conf import reader import test_initdb from test_utils import list_files_from from test_git_utils import create_commit_with_content, create_tag @attr('slow') class TestLocalLoader(unittest.TestCase): def setUp(self): """Initialize a git repository for the remaining test to manipulate. """ tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf_back = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf_back['db_url'] self.conf = { 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'local', 'backend': './resources/test/back.ini' } def init_db_setup(self): """Initialize a git repository for the remaining test to manipulate. """ test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) shutil.rmtree(self.conf_back['content_storage_dir'], ignore_errors=True) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def local_loader(self): """Trigger loader and make sure everything is ok. """ self.init_db_setup() # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 4, "4 blobs.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 4+3, "3 new blobs.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf_back['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger") diff --git a/swh/tests/test_remote_loader.py b/swh/loader/git/tests/test_remote_loader.py similarity index 98% rename from swh/tests/test_remote_loader.py rename to swh/loader/git/tests/test_remote_loader.py index 04912e1..a0ec0da 100644 --- a/swh/tests/test_remote_loader.py +++ b/swh/loader/git/tests/test_remote_loader.py @@ -1,251 +1,251 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil import os from nose.plugins.attrib import attr from nose.tools import istest -from swh.store import db, models -from swh.gitloader import loader -from swh.conf import reader +from swh.loader.git.store import db, models +from swh.loader.git.gitloader import loader +from swh.loader.git.conf import reader import test_initdb from test_git_utils import create_commit_with_content, create_tag from test_utils import list_files_from @attr('slow') class TestRemoteLoader(unittest.TestCase): def setUp(self): tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf['db_url'] self.conf.update({ 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'remote', 'backend': 'http://localhost:%s' % self.conf['port'] }) # Not the remote loader in charge of creating the folder, so we do it if not os.path.exists(self.conf['content_storage_dir']): os.mkdir(self.conf['content_storage_dir']) def init_db_setup(self): """Initialize a git repository for the remaining test to manipulate. """ test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) shutil.rmtree(self.conf['content_storage_dir']) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def remote_loader(self): """Trigger loader and make sure everything is ok. """ # given self.init_db_setup() # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 4, "4 blobs") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 4+3, "3 new blobs") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 7, "no new blob") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger") diff --git a/swh/tests/test_swhrepo.py b/swh/loader/git/tests/test_swhrepo.py similarity index 98% rename from swh/tests/test_swhrepo.py rename to swh/loader/git/tests/test_swhrepo.py index fbd2456..2555c90 100644 --- a/swh/tests/test_swhrepo.py +++ b/swh/loader/git/tests/test_swhrepo.py @@ -1,53 +1,53 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest -from swh.data import swhrepo +from swh.loader.git.data import swhrepo class SWHRepoTestCase(unittest.TestCase): @istest def new_swhrepo(self): # when r = swhrepo.SWHRepo() r.add_origin({'url': 'foobar'}) r.add_content({'id': 'some-con-sha1'}) r.add_content({'id': 'some-con-sha1-2','stuff': 'some-stuff'}) r.add_directory({'id': 'some-dir-sha1'}) r.add_directory({'id': 'some-dir-sha1-2'}) r.add_revision({'id': 'some-rev-sha1'}) r.add_revision({'id': 'some-rev-sha1-2'}) r.add_person('id0', {'name': 'the one'}) r.add_person('id1', {'name': 'another one'}) r.add_occurrence({'id': 'some-occ-sha1'}) r.add_release({'id': 'some-rel-sha1'}) # then assert r.get_origin() == {'url': 'foobar'} assert r.get_releases() == [{'id': 'some-rel-sha1'}] assert r.get_occurrences() == [{'id': 'some-occ-sha1'}] for sha in ['some-con-sha1', 'some-con-sha1-2', 'some-dir-sha1', 'some-dir-sha1-2', 'some-rev-sha1', 'some-rev-sha1-2']: assert r.already_visited(sha) is True assert r.already_visited('some-occ-sha1') is False assert r.already_visited('some-rel-sha1') is False assert r.get_contents() == {'some-con-sha1': {'id': 'some-con-sha1'}, 'some-con-sha1-2': {'id': 'some-con-sha1-2','stuff': 'some-stuff'}} assert r.get_directories() == {'some-dir-sha1': {'id': 'some-dir-sha1'}, 'some-dir-sha1-2': {'id': 'some-dir-sha1-2'}} assert r.get_revisions() == {'some-rev-sha1': {'id': 'some-rev-sha1'}, 'some-rev-sha1-2': {'id': 'some-rev-sha1-2'}} assert len(r.get_persons()) == 2 diff --git a/swh/tests/test_utils.py b/swh/loader/git/tests/test_utils.py similarity index 97% rename from swh/tests/test_utils.py rename to swh/loader/git/tests/test_utils.py index cf98d6c..f8edf1d 100644 --- a/swh/tests/test_utils.py +++ b/swh/loader/git/tests/test_utils.py @@ -1,59 +1,59 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import time import os import shutil import tempfile -from swh.backend import api +from swh.loader.git.backend import api from swh.storage.objstorage import ObjStorage import test_initdb def now(): """Build the date as of now in the api's format. """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) def list_files_from(root_path): """Compute the list of files from root_path. """ f = [] for (dirpath, dirnames, filenames) in os.walk(root_path): f.extend(filenames) return f def app_client(db_url="dbname=softwareheritage-dev-test"): """Setup the application ready for testing. """ content_storage_dir = tempfile.mkdtemp(prefix='test-swh-loader-git.', dir='/tmp') folder_depth = 2 api.app.config['conf'] = {'db_url': db_url, 'content_storage_dir': content_storage_dir, 'log_dir': '/tmp/swh-loader-git/log', 'folder_depth': folder_depth, 'debug': 'true', 'objstorage': ObjStorage(content_storage_dir, folder_depth) } api.app.config['TESTING'] = True app = api.app.test_client() test_initdb.prepare_db(db_url) return app, db_url, content_storage_dir def app_client_teardown(content_storage_dir): """Tear down app client's context. """ shutil.rmtree(content_storage_dir, ignore_errors=True)