diff --git a/Makefile.local b/Makefile.local index 8095504..21becb7 100644 --- a/Makefile.local +++ b/Makefile.local @@ -1,93 +1,93 @@ FLAKE = flake8 BINDIR = bin SRCDIR = swh REPO_PATH=./swh-loader-git-testdata # add -v for example FLAG= TESTDIR = ./swh/tests DB=softwareheritage-dev DB_TEST=$(DB)-test SWH_LOADER=$(BINDIR)/swh-git-loader SWH_DB_MANAGER=$(BINDIR)/swh-db-manager SWH_BACK=$(BINDIR)/swh-backend SQL_FOLDER=../swh-storage/sql/ # could use cProfile PROFILE_TYPE=profile FOLLOW_LOG=-f # Adapt python-path to use other swh modules -_PYPATH=`pwd`:`pwd`/../swh-core +_PYPATH=`pwd`:`pwd`/../swh-core:`pwd`/../swh-storage deps: apt-get install -y \ python3 \ python3-pygit2 \ python3-psycopg2 \ python3-nose \ python3-flask \ python3-requests \ python3-retrying \ ipython3 cover: PYTHONPATH=$(_PYPATH) make coverage clean: rm -rf /tmp/swh-git-loader/content-storage cleandb: clean PYTHONPATH=$(_PYPATH) $(SWH_DB_MANAGER) $(FLAG) cleandb run-remote: PYTHONPATH=$(_PYPATH) $(SWH_LOADER) $(FLAG) --config ./resources/remote-git-loader.ini load $(REPO_PATH) run-local: PYTHONPATH=$(_PYPATH) $(SWH_LOADER) $(FLAG) --config ./resources/local-git-loader.ini load $(REPO_PATH) run: # works with the default ~/.config/swh/git-loader.ini file PYTHONPATH=$(_PYPATH) $(SWH_LOADER) $(FLAG) load $(REPO_PATH) run-back: PYTHONPATH=$(_PYPATH) $(SWH_BACK) $(FLAG) connect-db: psql -d $(DB) create-db: cd $(SQL_FOLDER) && make clean initdb drop-db: cd $(SQL_FOLDER) && make clean dropdb check-meta: @echo "Repository: $(REPO_PATH)" @echo "Git metadata:" @$(BINDIR)/dir-git-repo-meta.sh $(REPO_PATH) @echo @echo "DB metadata:" @$(BINDIR)/db-git-repo-meta.sh $(DB) $(REPO_PATH) @echo log-loader: tail $(FOLLOW_LOG) /tmp/swh-git-loader/log/sgloader.log log-back: tail $(FOLLOW_LOG) /tmp/swh-git-loader/log/back.log profile-run: PYTHONPATH=$(_PYPATH) python3 -m $(PROFILE_TYPE) -o ./scratch/swhgitloader.$(PROFILE_TYPE) ./scratch/profile-swhgitloader.py profile-stats: PYTHONPATH=$(_PYPATH) ./scratch/analyse-profile.py include Makefile.tests diff --git a/swh/file.py b/swh/file.py deleted file mode 100644 index aa64d6a..0000000 --- a/swh/file.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import os -import gzip - -from retrying import retry -from swh.retry import policy - - -def folder_path(prefix_dir, hexhash, depth=4): - """Compute the folder prefix from a hexhash key. - The depth determines the number of subfolder from prefix_dir. - Default to 4. - Example: - - prefix_dir: /some/path - - depth: 2 - - hash: aabbccddeeffgghhii - -> folder: /some/path/aa/bb/ - """ - hexhashes = [hexhash[x:x+2] for x in range(0, 2*depth, 2)] - return os.path.join(prefix_dir, *hexhashes) - - -@retry(retry_on_exception=policy.retry_if_io_error, wrap_exception=True) -def write_data(bytes, path, comp_flag=None): - """Write bytes to path. - If compress_path is not None, gzip the bytes. - - If an IOError is raised, this function will be triggered immediately - again. - Otherwise, if any other error is raised, the error will be wrapped in - RetryError. - """ - with (gzip.open(path, 'wb') if comp_flag else open(path, 'wb')) as f: - return f.write(bytes) diff --git a/swh/store/fs.py b/swh/store/fs.py deleted file mode 100644 index f33d37d..0000000 --- a/swh/store/fs.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import logging -import os - - -from swh import file - - -def create_dir_from_hash(storage_dir, hashv, folder_depth): - """Create directory in folder storage_dir from a given hash value. - - storage_dir: prefix folder path to store data - - hashv: hash value to use as suffix-folder - - depth: depth to extract suffix from hashv - """ - folder_in_storage = file.folder_path(storage_dir, hashv, - folder_depth) - os.makedirs(folder_in_storage, exist_ok=True) - return folder_in_storage - - -def write_object(dataset_dir, hashv, data, folder_depth, compress_flag=None): - """Write object with data and hashv on disk in dataset_dir. - - storage_dir: prefix folder path to store data - - hashv: hash value to use as suffix-folder - - data: data to store on disk - - depth: depth to extract suffix from hashv - - compress_flag: is the data to be compressed (gzipped) or not - """ - folder_in_storage = create_dir_from_hash(dataset_dir, hashv, folder_depth) - filepath = os.path.join(folder_in_storage, hashv) - logging.debug('write object %s' % filepath) - return file.write_data(data, filepath, compress_flag) diff --git a/swh/store/store.py b/swh/store/store.py index 8d4f6a1..fcdbce6 100755 --- a/swh/store/store.py +++ b/swh/store/store.py @@ -1,199 +1,196 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import StringIO -from swh.store import models, fs + +from swh.store import models +from swh.storage.objstorage import ObjStorage Type = models.Type _find_object = {Type.occurrence: models.find_occurrences_for_revision} def find(db_conn, vcs_object): """Find an object according to its sha1hex and type. """ id = vcs_object['id'] # sha1 for every object except for origin (url) type = vcs_object['type'] find_fn = _find_object.get(type, models.find_object) return find_fn(db_conn, id, type) _find_unknown = {Type.revision: models.find_unknown_revisions, Type.content: models.find_unknown_contents, Type.directory: models.find_unknown_directories} def find_unknowns(db_conn, obj_type, sha1s_hex): """Given a list of sha1s, return the non presents one in storage. """ def row_to_sha1(row): """Convert a row (memoryview) to a string sha1. """ return row[0] vals = '\n'.join(sha1s_hex) cpy_data_buffer = StringIO() cpy_data_buffer.write(vals) cpy_data_buffer.seek(0) # move file cursor back at start of file find_unknown_fn = _find_unknown[obj_type] unknowns = find_unknown_fn(db_conn, cpy_data_buffer) cpy_data_buffer.close() return list(map(row_to_sha1, unknowns)) def _add_content(db_conn, vcs_object, sha1hex): """Add a blob to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_content(db_conn, sha1hex, vcs_object['content-sha1'], vcs_object['content-sha256'], vcs_object['size']) return sha1hex def _add_directory(db_conn, vcs_object, sha1hex): """Add a directory to storage. Designed to be wrapped in a db transaction. """ models.add_directory(db_conn, sha1hex) for directory_entry in vcs_object['entries']: _add_directory_entry(db_conn, directory_entry) return sha1hex def _add_directory_entry(db_conn, vcs_object): """Add a directory to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ name = vcs_object['name'] parent = vcs_object['parent'] models.add_directory_entry(db_conn, name, vcs_object['target-sha1'], vcs_object['nature'], vcs_object['perms'], vcs_object['atime'], vcs_object['mtime'], vcs_object['ctime'], parent) return name, parent def _add_revision(db_conn, vcs_object, sha1hex): """Add a revision to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_revision(db_conn, sha1hex, vcs_object['date'], vcs_object['directory'], vcs_object['message'], vcs_object['author'], vcs_object['committer'], vcs_object['parent-sha1s']) return sha1hex def _add_release(db_conn, vcs_object, sha1hex): """Add a release. """ models.add_release(db_conn, sha1hex, vcs_object['revision'], vcs_object['date'], vcs_object['name'], vcs_object['comment'], vcs_object['author']) return sha1hex def _add_occurrence(db_conn, vcs_object, sha1hex): """Add an occurrence. """ models.add_occurrence(db_conn, vcs_object['url-origin'], vcs_object['reference'], vcs_object['revision']) return sha1hex def add_person(db_conn, vcs_object): """Add an author. """ return models.add_person(db_conn, vcs_object['name'], vcs_object['email']) _store_fn = {Type.content: _add_content, Type.directory: _add_directory, Type.revision: _add_revision, Type.release: _add_release, Type.occurrence: _add_occurrence} def add_origin(db_conn, origin): """A a new origin and returns its id. """ return models.add_origin(db_conn, origin['url'], origin['type']) def find_origin(db_conn, origin): """Find an existing origin. """ return models.find_origin(db_conn, origin['url'], origin['type']) def find_person(db_conn, person): """Find an existing person. """ return models.find_person(db_conn, person['email'], person['name']) - + def add(db_conn, config, vcs_object): """Given a sha1hex, type and content, store a given object in the store. """ type = vcs_object['type'] sha1hex = vcs_object['id'] obj_content = vcs_object.get('content') - + obj_storage = ObjStorage(config['content_storage_dir'], config['folder_depth']) # FIXME: Add this in loaders + if obj_content: - res = fs.write_object(config['content_storage_dir'], - sha1hex, - obj_content, - config['folder_depth'], - config['storage_compression']) - if not res: - return False + obj_storage.add_bytes(obj_content, sha1hex) return _store_fn[type](db_conn, vcs_object, sha1hex) return _store_fn[type](db_conn, vcs_object, sha1hex) def add_revision_history(db_conn, couple_parents): """Given a list of tuple (sha, parent_sha), store in revision_history. """ if len(couple_parents) > 0: models.add_revision_history(db_conn, couple_parents) diff --git a/swh/tests/test_api_content.py b/swh/tests/test_api_content.py index 4927715..2e3c921 100644 --- a/swh/tests/test_api_content.py +++ b/swh/tests/test_api_content.py @@ -1,107 +1,110 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial -from test_utils import app_client +from test_utils import app_client, app_client_teardown @attr('slow') class ContentTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.content_sha1_id = '222222f9dd5dc46ee476a8be155ab049994f717e' content_sha1_id = 'blabliblablo' self.content_sha256_hex = '222222f9dd5dc46ee476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, content_sha1_id, self.content_sha256_hex, 10) + def tearDown(self): + app_client_teardown(self.content_storage_dir) + @istest def get_content_ok(self): # when rv = self.app.get('/vcs/contents/%s' % self.content_sha1_id) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == '222222f9dd5dc46ee476a8be155ab049994f717e' @istest def get_content_not_found(self): # when rv = self.app.get('/vcs/contents/222222f9dd5dc46ee476a8be155ab049994f7170') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_content_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/contents/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_content_create_and_update(self): content_sha1 = 'sha1-contentc46ee476a8be155ab03333333333' # does not exist rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = {'id': content_sha1, 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', 'content': b'bar', 'size': '3'} rv = self.app.put('/vcs/contents/%s' % content_sha1, data=serial.dumps(body), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' # # we update it body = {'id': content_sha1, 'content-sha1': 'content-sha1c46ee476a8be155ab03333333333', 'content-sha256': 'content-sha2566ee476a8be155ab03333333333', 'content': b'bar', 'size': '3'} rv = self.app.put('/vcs/contents/%s' % content_sha1, data=serial.dumps(body), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/contents/%s' % content_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'sha1-contentc46ee476a8be155ab03333333333' diff --git a/swh/tests/test_api_directory.py b/swh/tests/test_api_directory.py index 5483aaa..c1082ae 100644 --- a/swh/tests/test_api_directory.py +++ b/swh/tests/test_api_directory.py @@ -1,117 +1,120 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial -from test_utils import now, app_client +from test_utils import now, app_client, app_client_teardown @attr('slow') class DirectoryTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.content_sha1_id = 'content-sha1c46ee476a8be155ab049994f717e' content_sha1_hex = 'content-sha1c46ee476a8be155ab049994f717e' content_sha256_hex = 'content-sha2566ee476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, content_sha1_hex, content_sha256_hex, 10) self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) + def tearDown(self): + app_client_teardown(self.content_storage_dir) + @istest def get_directory_ok(self): # when rv = self.app.get('/vcs/directories/%s' % self.directory_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f717e' @istest def get_directory_not_found(self): # when rv = self.app.get('/vcs/directories/111111f9dd5dc46ee476a8be155ab049994f7170') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_directory_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/directories/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_directory_create_and_update(self): directory_sha1='directory-sha16ee476a8be155ab049994f7170' # does not exist rv = self.app.get('/vcs/directories/%s' % directory_sha1) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'content': b'directory has content too.', 'entries': [{'name': 'filename', 'target-sha1': self.content_sha1_id, 'nature': 'file', 'perms': '000', 'atime': now(), 'mtime': now(), 'ctime': now(), 'parent': directory_sha1}, {'name': 'dirname', 'target-sha1': self.directory_sha1_hex, 'nature': 'directory', 'perms': '012', 'atime': now(), 'mtime': now(), 'ctime': now(), 'parent': directory_sha1} ]}) rv = self.app.put('/vcs/directories/%s' % directory_sha1, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/directories/%s' % directory_sha1) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' # we update it rv = self.app.put('/vcs/directories/directory-sha16ee476a8be155ab049994f7170', data=serial.dumps({'entry': 'directory-bar'}), headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/directories/directory-sha16ee476a8be155ab049994f7170') # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == 'directory-sha16ee476a8be155ab049994f7170' diff --git a/swh/tests/test_api_home.py b/swh/tests/test_api_home.py index 984fa1c..3352151 100644 --- a/swh/tests/test_api_home.py +++ b/swh/tests/test_api_home.py @@ -1,43 +1,43 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from test_utils import app_client @attr('slow') class HomeTestCase(unittest.TestCase): def setUp(self): - self.app, _ = app_client() + self.app, _, _ = app_client() @istest def get_slash(self): # when rv = self.app.get('/') # then assert rv.status_code == 200 assert rv.data == b'Dev SWH API' @istest def get_404(self): # when rv = self.app.get('/nowhere') # then assert rv.status_code == 404 @istest def get_bad_request(self): # when rv = self.app.get('/vcs/not-a-good-type/1') # then assert rv.status_code == 400 assert rv.data == b'Bad request!' diff --git a/swh/tests/test_api_occurrence.py b/swh/tests/test_api_occurrence.py index 60bafcb..30b95cc 100644 --- a/swh/tests/test_api_occurrence.py +++ b/swh/tests/test_api_occurrence.py @@ -1,130 +1,132 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial -from test_utils import now, app_client +from test_utils import now, app_client, app_client_teardown @attr('slow') class OccurrenceTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') self.reference_name = 'master' models.add_occurrence(db_conn, self.origin_url, self.reference_name, self.revision_sha1_hex) self.reference_name2 = 'master2' models.add_occurrence(db_conn, self.origin_url, self.reference_name2, self.revision_sha1_hex) self.revision_sha1_hex_2 = '2-revision-sha1-to-test-existence9994f71' models.add_revision(db_conn, self.revision_sha1_hex_2, now(), self.directory_sha1_hex, "revision message 2", authorAndCommitter, authorAndCommitter) + def tearDown(self): + app_client_teardown(self.content_storage_dir) @istest def get_occurrence_ok(self): # when rv = self.app.get('/vcs/occurrences/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [self.reference_name, self.reference_name2] @istest def get_occurrence_not_found(self): # when rv = self.app.get('/vcs/occurrences/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_occurrence_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/occurrences/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_occurrence_create_and_update(self): occ_revision_sha1_hex = self.revision_sha1_hex_2 rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'content': b'occurrence content', 'revision': occ_revision_sha1_hex, # FIXME: redundant with the one from uri.. 'reference': 'master', 'url-origin': self.origin_url}) rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, # ... here data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] # we update it rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] diff --git a/swh/tests/test_api_origin.py b/swh/tests/test_api_origin.py index 9f77f3b..c34adbf 100644 --- a/swh/tests/test_api_origin.py +++ b/swh/tests/test_api_origin.py @@ -1,98 +1,98 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial from test_utils import app_client @attr('slow') class OriginTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, _ = app_client() with db.connect(db_url) as db_conn: self.origin_url = 'https://github.com/torvalds/linux.git' self.origin_type = 'git' self.origin_id = models.add_origin(db_conn, self.origin_url, self.origin_type) @istest def get_origin_ok(self): # when payload = {'url': self.origin_url, 'type': self.origin_type} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.origin_id @istest def get_origin_not_found(self): # when payload = {'url': 'unknown', 'type': 'blah'} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Origin not found!' @istest def get_origin_not_found_with_bad_format(self): # when rv = self.app.post('/origins/', data=serial.dumps({'url': 'unknown'}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 @istest def put_origin(self): # when payload = {'url': 'unknown', 'type': 'blah'} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Origin not found!' # when rv = self.app.put('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 # FIXME: 201 assert serial.loads(rv.data)['id'] payload = {'url': 'unknown', 'type': 'blah'} rv = self.app.post('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 origin_id = serial.loads(rv.data)['id'] assert origin_id # when rv = self.app.put('/origins/', data=serial.dumps(payload), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 # FIXME: 204 assert serial.loads(rv.data)['id'] == origin_id diff --git a/swh/tests/test_api_person.py b/swh/tests/test_api_person.py index 0107444..4b81d60 100644 --- a/swh/tests/test_api_person.py +++ b/swh/tests/test_api_person.py @@ -1,98 +1,98 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial from test_utils import app_client @attr('slow') class PersonTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, _ = app_client() with db.connect(db_url) as db_conn: self.person_name = 'some-name' self.person_email = 'some@mail.git' self.person_id = models.add_person(db_conn, self.person_name, self.person_email) @istest def get_person_ok(self): # when person = {'name': self.person_name, 'email': self.person_email} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.person_id @istest def get_person_not_found(self): # when person = {'name': 'unknown', 'email': 'blah'} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Person not found!' @istest def get_person_not_found_with_bad_format(self): # when rv = self.app.post('/vcs/persons/', data=serial.dumps({'name': 'unknown'}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 @istest def put_person(self): # when person = {'name': 'unknown', 'email': 'blah'} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 404 assert rv.data == b'Person not found!' # when rv = self.app.put('/vcs/persons/', data=serial.dumps([person]), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 204 assert rv.data == b'' person = {'name': 'unknown', 'email': 'blah'} rv = self.app.post('/vcs/persons/', data=serial.dumps(person), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 person_id = serial.loads(rv.data)['id'] assert person_id # when rv = self.app.put('/vcs/persons/', data=serial.dumps([person, person]), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 204 assert rv.data == b'' diff --git a/swh/tests/test_api_post_per_type.py b/swh/tests/test_api_post_per_type.py index ab528e3..25f6ab4 100644 --- a/swh/tests/test_api_post_per_type.py +++ b/swh/tests/test_api_post_per_type.py @@ -1,210 +1,213 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial -from test_utils import now, app_client +from test_utils import now, app_client, app_client_teardown @attr('slow') class TestPostObjectsPerTypeCase(unittest.TestCase): def setUp(self): - self.app, self.db_url = app_client() + self.app, self.db_url, self.content_storage_dir = app_client() with db.connect(self.db_url) as db_conn: self.content_sha1_id = 'sha1-content0-6ee476a8be155ab049994f717e' self.content_sha256_hex = 'sha256-content0-e476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, self.content_sha1_id, self.content_sha256_hex, 10) self.directory_sha1_hex = 'directory-sha1-ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) - + authorAndCommitter2 = {'name': 'tony', 'email': 'tony@dude.org'} models.add_person(db_conn, authorAndCommitter2['name'], authorAndCommitter2['email']) - + self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.revision_sha1_hex2 = 'revision-sha1-2-for-testing-put-occurr' models.add_revision(db_conn, self.revision_sha1_hex2, now(), self.directory_sha1_hex, "revision message", authorAndCommitter2, authorAndCommitter2, parent_shas=['revision-sha1-to-test-existence9994f717e']) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", authorAndCommitter2) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') models.add_occurrence(db_conn, self.origin_url, 'master', self.revision_sha1_hex) + def tearDown(self): + app_client_teardown(self.content_storage_dir) + @istest def post_all_non_presents_contents(self): # given # when payload = [self.content_sha1_id, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/contents/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_directories(self): # given # when payload = [self.directory_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/directories/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_revisions(self): # given # when payload = [self.revision_sha1_hex, self.revision_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/revisions/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_releases(self): # given # when payload = [self.release_sha1_hex, self.release_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/releases/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 assert rv.data == b'Bad request. Type not supported!' @istest def post_all_non_presents_occurrences_KO(self): # given # when payload = [self.revision_sha1_hex, self.revision_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/occurrences/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 assert rv.data == b'Bad request. Type not supported!' @istest def post_non_presents_objects_empty_payload_so_empty_results(self): # given # when for api_type in ['contents', 'directories', 'revisions']: rv = self.app.post('/vcs/%s/' % api_type, data=serial.dumps({}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [] @istest def post_non_presents_objects_bad_requests_format_pickle(self): # given # when for api_type in ['contents', 'directories', 'revisions']: rv = self.app.post('/vcs/%s/' % api_type, data="not pickle -> fail") # then assert rv.status_code == 400 assert rv.data == b'Bad request. Expected application/octet-stream data!' diff --git a/swh/tests/test_api_release.py b/swh/tests/test_api_release.py index ec9bc8d..31b051d 100644 --- a/swh/tests/test_api_release.py +++ b/swh/tests/test_api_release.py @@ -1,117 +1,120 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial -from test_utils import now, app_client +from test_utils import now, app_client, app_client_teardown @attr('slow') class ReleaseTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.tagAuthor = {'name': 'tony', 'email': 'tony@mail.org'} models.add_person(db_conn, self.tagAuthor['name'], self.tagAuthor['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.tagAuthor, self.tagAuthor) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", self.tagAuthor) + def tearDown(self): + app_client_teardown(self.content_storage_dir) + @istest def get_release_ok(self): # when rv = self.app.get('/vcs/releases/%s' % self.release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.release_sha1_hex @istest def get_release_not_found(self): # when rv = self.app.get('/vcs/releases/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_release_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/releases/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_release_create_and_update(self): release_sha1_hex = 'sha1-release46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'id': release_sha1_hex, 'content': b'release also has content', 'revision': self.revision_sha1_hex, 'date': now(), 'name': '0.0.1', 'comment': 'super release tagged by ardumont', 'author': self.tagAuthor}) rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex # we update it rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex diff --git a/swh/tests/test_api_revision.py b/swh/tests/test_api_revision.py index 3c2162a..dc2a30a 100644 --- a/swh/tests/test_api_revision.py +++ b/swh/tests/test_api_revision.py @@ -1,108 +1,111 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.store import db, models from swh.protocols import serial -from test_utils import now, app_client +from test_utils import now, app_client, app_client_teardown @attr('slow') class RevisionTestCase(unittest.TestCase): def setUp(self): - self.app, db_url = app_client() + self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, self.authorAndCommitter['name'], self.authorAndCommitter['email']) - + self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), self.directory_sha1_hex, "revision message", self.authorAndCommitter, self.authorAndCommitter) + def tearDown(self): + app_client_teardown(self.content_storage_dir) + @istest def get_revision_ok(self): # when rv = self.app.get('/vcs/revisions/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.revision_sha1_hex @istest def get_revision_not_found(self): # when rv = self.app.get('/vcs/revisions/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_revision_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/revisions/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_revision_create_and_update(self): revision_sha1_hex = 'sha1-revision46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'content': b'revision has content too.', 'date': now(), 'directory': self.directory_sha1_hex, 'message': 'revision message describing it', 'committer': self.authorAndCommitter, 'author': self.authorAndCommitter, 'parent-sha1s': [self.revision_sha1_hex]}) rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex # we update it rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 200 assert rv.data == b'Successful update!' # still the same rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex diff --git a/swh/tests/test_file.py b/swh/tests/test_file.py deleted file mode 100644 index cb086a3..0000000 --- a/swh/tests/test_file.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (C) 2015 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import unittest -import tempfile -import gzip -import os - -from nose.tools import istest -from nose.plugins.attrib import attr - -from swh import file - - -@attr('slow') -class TestFile(unittest.TestCase): - @istest - def check_folder_path_computation(self): - # when - fpath = file.folder_path('prefix-dir', 'aabbccdd0123456789') - - # then - self.assertEquals(fpath, - 'prefix-dir/aa/bb/cc/dd', - 'Depth should be 4 because not specified') - - @istest - def check_folder_path_computation_with_different_deps(self): - # when - fpath = file.folder_path('prefix-dir', 'aabb01234567890', 2) - - # then - self.assertEquals(fpath, - 'prefix-dir/aa/bb', - 'Depth is now 2') - - @istest - def check_write_data_with_no_compress_flag(self): - # given - _, tmpfile = tempfile.mkstemp(prefix='swh-git-loader.', - dir='/tmp') - - # when - res = file.write_data(b'some data to write', tmpfile) - - # then - assert res is len('some data to write') - - with open(tmpfile, 'r') as f: - self.assertEquals('some data to write', - f.read(), - 'Data read should be the same!') - - # cleanup - os.remove(tmpfile) - - @istest - def check_write_data_with_compress_flag_on(self): - # given - _, tmpfile = tempfile.mkstemp(prefix='swh-git-loader.', - dir='/tmp') - - # when - res = file.write_data(b'some data to write compressed', tmpfile, True) - - # then - assert res is len('some data to write compressed') - - with gzip.open(tmpfile, 'r') as f: - self.assertEquals('some data to write compressed'.encode('utf-8'), - f.read(), - 'Compressed data read should be the same!') - - # cleanup - os.remove(tmpfile) diff --git a/swh/tests/test_remote_loader.py b/swh/tests/test_remote_loader.py index 2ad6803..0b7d30f 100644 --- a/swh/tests/test_remote_loader.py +++ b/swh/tests/test_remote_loader.py @@ -1,242 +1,248 @@ # coding: utf-8 # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pygit2 import tempfile import shutil +import os from nose.plugins.attrib import attr from nose.tools import istest from swh.store import db, models from swh.gitloader import loader from swh.conf import reader import test_initdb from test_git_utils import create_commit_with_content, create_tag from test_utils import list_files_from + @attr('slow') class TestRemoteLoader(unittest.TestCase): def setUp(self): """Initialize a git repository for the remaining test to manipulate. """ tmp_git_folder_path = tempfile.mkdtemp(prefix='test-sgloader.', dir='/tmp') self.tmp_git_repo = pygit2.init_repository(tmp_git_folder_path) self.conf = reader.read('./resources/test/back.ini', {'port': ('int', 9999)}) self.db_url = self.conf['db_url'] self.conf.update({ 'action': 'load', 'repo_path': self.tmp_git_repo.workdir, 'backend-type': 'remote', 'backend': 'http://localhost:%s' % self.conf['port'] }) + # Not the remote loader in charge of creating the folder, so we do it + if not os.path.exists(self.conf['content_storage_dir']): + os.mkdir(self.conf['content_storage_dir']) + test_initdb.prepare_db(self.db_url) def tearDown(self): """Destroy the test git repository. """ shutil.rmtree(self.tmp_git_repo.workdir) - shutil.rmtree(self.conf['content_storage_dir'], ignore_errors=True) + shutil.rmtree(self.conf['content_storage_dir']) @istest def should_fail_on_bad_action(self): # when try: loader.load({'action': 'unknown'}) except: pass @istest def should_fail_on_inexistant_folder(self): # when try: loader.load({'action': 'load', 'repo_path': 'something-that-definitely-does-not-exist'}) except: pass @istest def should_fail_on_inexistant_backend_type(self): # when try: loader.load({'action': 'load', 'repo_path': '.', 'backend-type': 'unknown'}) # only local or remote supported except: pass @istest def remote_loader(self): """Trigger loader and make sure everything is ok. """ # given commit0 = create_commit_with_content(self.tmp_git_repo, 'blob 0', 'commit msg 0') commit1 = create_commit_with_content(self.tmp_git_repo, 'blob 1', 'commit msg 1', [commit0.hex]) commit2 = create_commit_with_content(self.tmp_git_repo, 'blob 2', 'commit msg 2', [commit1.hex]) commit3 = create_commit_with_content(self.tmp_git_repo, None, 'commit msg 3', [commit2.hex]) commit4 = create_commit_with_content(self.tmp_git_repo, 'blob 4', 'commit msg 4', [commit3.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 4+5+4, "4 blobs, 4 trees, 5 commits were created so 13 files.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 5, "Should be 5 commits") self.assertEquals( models.count_directories(db_conn), 5, "Should be 5 trees") self.assertEquals( models.count_contents(db_conn), 4, "Should be 4 blobs as we created one commit without data!") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 1, "Should be 1 reference (master) so 1 occurrence.") # given commit5 = create_commit_with_content(self.tmp_git_repo, 'new blob 5', 'commit msg 5', [commit4.hex]) commit6 = create_commit_with_content(self.tmp_git_repo, 'new blob and last 6', 'commit msg 6', [commit5.hex]) commit7 = create_commit_with_content(self.tmp_git_repo, 'new blob 7', 'commit msg 7', [commit6.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 13+3+3+3, "3 commits + 3 trees + 3 blobs so 9 more.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 8, "Should be 5+3 == 8 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 5+3 == 8 trees") self.assertEquals( models.count_contents(db_conn), 7, "Should be 4+3 == 7 blobs") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 2, "Should be 1 reference which changed twice so 2 occurrences (master changed).") # given create_commit_with_content(self.tmp_git_repo, None, 'commit 8 with parent 2', [commit7.hex]) # when loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 22+1, "1 commit more without blob so no tree either.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 0, "No tag created so 0 release.") self.assertEquals( models.count_occurrence(db_conn), 3, "Should be 1 reference which changed thrice so 3 occurrences (master changed again).") self.assertEquals( models.count_person(db_conn), 2, "1 author + 1 committer") # add tag create_tag(self.tmp_git_repo, '0.0.1', commit5, 'bad ass release 0.0.1, towards infinity...') create_tag(self.tmp_git_repo, '0.0.2', commit7, 'release 0.0.2... and beyond') loader.load(self.conf) # then nb_files = len(list_files_from(self.conf['content_storage_dir'])) self.assertEquals(nb_files, 23+2, "2 tags more.") with db.connect(self.db_url) as db_conn: self.assertEquals( models.count_revisions(db_conn), 9, "Should be 8+1 == 9 commits now") self.assertEquals( models.count_directories(db_conn), 8, "Should be 8 trees (new commit without blob so no new tree)") self.assertEquals( models.count_contents(db_conn), 7, "Should be 7 blobs (new commit without new blob)") self.assertEquals( models.count_release(db_conn), 2, "Should be 2 annotated tags so 2 releases") self.assertEquals( models.count_occurrence(db_conn), 3, "master did not change this time so still 3 occurrences") self.assertEquals( models.count_person(db_conn), 3, "1 author + 1 committer + 1 tagger") diff --git a/swh/tests/test_utils.py b/swh/tests/test_utils.py index a1831b8..56f833d 100644 --- a/swh/tests/test_utils.py +++ b/swh/tests/test_utils.py @@ -1,42 +1,55 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import time import os - +import shutil +import tempfile from swh.backend import api import test_initdb def now(): - "Build the date as of now in the api's format." + """Build the date as of now in the api's format. + + """ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) def list_files_from(root_path): """Compute the list of files from root_path. """ f = [] for (dirpath, dirnames, filenames) in os.walk(root_path): f.extend(filenames) return f def app_client(db_url="dbname=softwareheritage-dev-test"): """Setup the application ready for testing. + """ + content_storage_dir = tempfile.mkdtemp(prefix='test-swh-git-loader.', + dir='/tmp') api.app.config['conf'] = {'db_url': db_url, - 'content_storage_dir': '/tmp/swh-git-loader/content-storage', - 'log_dir': '/tmp/swh-git-loader/log', - 'folder_depth': 2, - 'storage_compression': None, - 'debug': 'true'} + 'content_storage_dir': content_storage_dir, + 'log_dir': '/tmp/swh-git-loader/log', + 'folder_depth': 2, + 'storage_compression': None, + 'debug': 'true'} api.app.config['TESTING'] = True app = api.app.test_client() test_initdb.prepare_db(db_url) - return app, db_url + return app, db_url, content_storage_dir + + +def app_client_teardown(content_storage_dir): + """Tear down app client's context. + + """ + shutil.rmtree(content_storage_dir, ignore_errors=True)