diff --git a/swh/loader/git/git.py b/swh/loader/git/git.py index 0b8d525..ae32ee4 100644 --- a/swh/loader/git/git.py +++ b/swh/loader/git/git.py @@ -1,218 +1,218 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import glob import logging import os import subprocess import time import pygit2 from pygit2 import GIT_REF_OID from pygit2 import GIT_OBJ_COMMIT, GIT_OBJ_TREE, GIT_SORT_TOPOLOGICAL from enum import Enum from swh.core import hashutil from swh.loader.git import date from swh.loader.git.data import swhrepo from swh.loader.git.storage import storage def list_objects_from_packfile_index(packfile_index): """List the objects indexed by this packfile. """ input_file = open(packfile_index, 'rb') with subprocess.Popen( ['/usr/bin/git', 'show-index'], stdin=input_file, stdout=subprocess.PIPE, ) as process: for line in process.stdout.readlines(): obj_id = line.decode('utf-8', 'ignore').split()[1] yield obj_id def list_objects(repo): """List the objects in a given repository. """ objects_dir = os.path.join(repo.path, 'objects') objects_glob = os.path.join(objects_dir, '[0-9a-f]' * 2, '[0-9a-f]' * 38) packfile_dir = os.path.join(objects_dir, 'pack') if os.path.isdir(packfile_dir): for packfile_index in os.listdir(packfile_dir): if not packfile_index.endswith('.idx'): # Not an index file continue packfile_index_path = os.path.join(packfile_dir, packfile_index) yield from list_objects_from_packfile_index(packfile_index_path) for object_file in glob.glob(objects_glob): yield ''.join(object_file.split(os.path.sep)[-2:]) HASH_ALGORITHMS = ['sha1', 'sha256'] def parse(repo_path): """Given a repository path, parse and return a memory model of such repository. """ def read_signature(signature): return '%s <%s>' % (signature.name, signature.email) def treewalk(repo, tree): """Walk a tree with the same implementation as `os.path`. Returns: tree, trees, contents """ trees, contents, dir_entry_dirs, dir_entry_files = [], [], [], [] for tree_entry in tree: if swh_repo.already_visited(tree_entry.hex): logging.debug('tree_entry %s already visited,' ' skipped' % tree_entry.hex) continue obj = repo.get(tree_entry.oid) if obj is None: # or obj.type == GIT_OBJ_COMMIT: logging.warn('skip submodule-commit %s' % tree_entry.hex) continue # submodule! dir_entry = {'name': tree_entry.name, 'type': storage.Type.directory_entry, 'target-sha1': obj.hex, 'perms': tree_entry.filemode, 'atime': None, 'mtime': None, 'ctime': None} if obj.type == GIT_OBJ_TREE: logging.debug('found tree %s' % tree_entry.hex) trees.append(tree_entry) dir_entry_dirs.append(dir_entry) else: logging.debug('found content %s' % tree_entry.hex) data = obj.data hashes = hashutil.hashdata(data, HASH_ALGORITHMS) contents.append({'id': hashes['sha1'], 'type': storage.Type.content, 'git-sha1': obj.hex, 'content-sha256': hashes['sha256'], 'content': data, 'size': obj.size}) dir_entry_files.append(dir_entry) yield tree, dir_entry_dirs, dir_entry_files, trees, contents for tree_entry in trees: for x in treewalk(repo, repo[tree_entry.oid]): yield x def walk_tree(repo, swh_repo, rev): """Walk the rev revision's directories. """ for dir_root, dir_entry_dirs, dir_entry_files, _, contents_ref \ in treewalk(repo, rev.tree): for content_ref in contents_ref: swh_repo.add_content(content_ref) swh_repo.add_directory({'id': dir_root.hex, 'type': storage.Type.directory, 'entry-dirs': dir_entry_dirs, 'entry-files': dir_entry_files}) revision_parent_sha1s = list(map(str, rev.parent_ids)) author = {'name': rev.author.name, 'email': rev.author.email, 'type': storage.Type.person} committer = {'name': rev.committer.name, 'email': rev.committer.email, 'type': storage.Type.person} swh_repo.add_revision({'id': rev.hex, 'type': storage.Type.revision, 'date': date.ts_to_datetime( - rev.commit_time, - rev.commit_time_offset), - 'author_date': date.ts_to_datetime( rev.author.time, rev.author.offset), + 'committer-date': date.ts_to_datetime( + rev.commit_time, + rev.commit_time_offset), 'directory': rev.tree.hex, 'message': rev.message, 'committer': committer, 'author': author, 'parent-sha1s': revision_parent_sha1s }) swh_repo.add_person(read_signature(rev.author), author) swh_repo.add_person(read_signature(rev.committer), committer) return swh_repo def walk_revision_from(repo, swh_repo, head_rev): """Walk the rev history log from head_rev. - repo is the current repository - rev is the latest rev to start from. """ for rev in repo.walk(head_rev.id, GIT_SORT_TOPOLOGICAL): sha1 = rev.hex if swh_repo.already_visited(sha1): logging.debug('commit %s already visited, skipped' % sha1) continue swh_repo = walk_tree(repo, swh_repo, rev) return swh_repo repo = pygit2.Repository(repo_path) # memory model swh_repo = swhrepo.SWHRepo() # add origin origin = {'type': 'git', 'url': 'file://' + repo.path} swh_repo.add_origin(origin) # add references and crawl them for ref_name in repo.listall_references(): logging.info('walk reference %s' % ref_name) ref = repo.lookup_reference(ref_name) head_rev = repo[ref.target] \ if ref.type is GIT_REF_OID \ else ref.peel(GIT_OBJ_COMMIT) # noqa if isinstance(head_rev, pygit2.Tag): head_start = head_rev.get_object() taggerSig = head_rev.tagger author = {'name': taggerSig.name, 'email': taggerSig.email, 'type': storage.Type.person} release = {'id': head_rev.hex, 'type': storage.Type.release, 'revision': head_rev.target.hex, 'name': ref_name, 'date': date.ts_to_datetime(taggerSig.time, taggerSig.offset), 'author': author, 'comment': head_rev.message} swh_repo.add_release(release) swh_repo.add_person(read_signature(taggerSig), author) else: swh_repo.add_occurrence({'id': head_rev.hex, 'revision': head_rev.hex, 'branch': ref_name, 'url-origin': origin['url'], 'type': storage.Type.occurrence}) head_start = head_rev # crawl commits and trees walk_revision_from(repo, swh_repo, head_start) return swh_repo diff --git a/swh/loader/git/storage/models.py b/swh/loader/git/storage/models.py index ee2ec60..4ea1e0a 100644 --- a/swh/loader/git/storage/models.py +++ b/swh/loader/git/storage/models.py @@ -1,366 +1,368 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from enum import Enum from . import db class Type(Enum): """Types of git objects. """ occurrence = 'occurrence' # ~git branch release = 'release' # ~git annotated tag revision = 'revision' # ~git commit directory = 'directory' # ~git tree directory_entry = 'directory_entry' # ~git tree_entry content = 'content' # ~git blob origin = 'origin' person = 'person' # committer, tagger, author def initdb(db_conn): """For retrocompatibility. """ pass def cleandb(db_conn): """Clean up DB. """ db.queries_execute(db_conn, [ 'TRUNCATE TABLE content CASCADE;', 'TRUNCATE TABLE organization CASCADE;', 'TRUNCATE TABLE list_history CASCADE;', 'TRUNCATE TABLE origin CASCADE;', 'TRUNCATE TABLE fetch_history CASCADE;', 'TRUNCATE TABLE project CASCADE;', 'TRUNCATE TABLE project_history CASCADE;', 'TRUNCATE TABLE directory CASCADE;', 'TRUNCATE TABLE directory_entry_dir CASCADE;', 'TRUNCATE TABLE directory_list_dir CASCADE;', 'TRUNCATE TABLE directory_entry_file CASCADE;', 'TRUNCATE TABLE directory_list_file CASCADE;', 'TRUNCATE TABLE person CASCADE;', 'TRUNCATE TABLE revision CASCADE;', 'TRUNCATE TABLE revision_history CASCADE;', 'TRUNCATE TABLE occurrence_history CASCADE;', 'TRUNCATE TABLE occurrence CASCADE;', 'TRUNCATE TABLE release CASCADE;', ]) def add_origin(db_conn, url, type, parent=None): """Insert origin and returns the newly inserted id. """ return db.insert(db_conn, ("""INSERT INTO origin (type, url, parent_id) VALUES (%s, %s, %s) RETURNING id""", (type, url, parent))) def add_person(db_conn, name, email): """Insert author and returns the newly inserted id. """ return db.insert(db_conn, ("""INSERT INTO person (name, email) VALUES (%s, %s) RETURNING id""", (name, email))) def add_content(db_conn, sha1, sha1_git, sha256_content, size): """Insert a new content. """ db.query_execute(db_conn, ("""INSERT INTO content (sha1, sha1_git, sha256, length) VALUES (%s, %s, %s, %s)""", (sha1, sha1_git, sha256_content, size))) def add_directory(db_conn, obj_sha): """Insert a new directory. """ return db.insert(db_conn, ("""INSERT INTO directory (id) VALUES (%s) RETURNING id""", (obj_sha,))) def add_directory_entry_dir(db_conn, name, sha, perms, atime, mtime, ctime, parent_id): """Insert a new directory entry dir. """ dir_entry_id = db.insert(db_conn, ("""INSERT INTO directory_entry_dir (name, target, perms, atime, mtime, ctime) VALUES (%s, %s, %s, %s, %s, %s) RETURNING id""", (name, sha, perms, atime, mtime, ctime))) db.query_execute(db_conn, ("""INSERT INTO directory_list_dir (dir_id, entry_id) VALUES (%s, %s)""", (parent_id, dir_entry_id))) def add_directory_entry_file(db_conn, name, sha, perms, atime, mtime, ctime, parent_id): """Insert a new directory entry file. """ dir_entry_id = db.insert(db_conn, ("""INSERT INTO directory_entry_file (name, target, perms, atime, mtime, ctime) VALUES (%s, %s, %s, %s, %s, %s) RETURNING id""", (name, sha, perms, atime, mtime, ctime))) db.query_execute(db_conn, ("""INSERT INTO directory_list_file (dir_id, entry_id) VALUES (%s, %s)""", (parent_id, dir_entry_id))) -def add_revision(db_conn, sha, date, directory, message, author, committer, - parent_shas=None): +def add_revision(db_conn, sha, date, committer_date, directory, message, author, + committer, parent_shas=None): """Insert a revision. """ db.query_execute( db_conn, ("""INSERT INTO revision - (id, date, type, directory, message, author, committer) - VALUES (%s, %s, %s, %s, %s, + (id, date, committer_date, type, directory, message, + author, + committer) + VALUES (%s, %s, %s, %s, %s, %s, (select id from person where name=%s and email=%s), (select id from person where name=%s and email=%s))""", - (sha, date, 'git', directory, message, + (sha, date, committer_date, 'git', directory, message, author['name'], author['email'], committer['name'], committer['email']))) def add_revision_history(db_conn, tuple_parents): """Store the revision history graph. """ tuples = ','.join(["('%s','%s', %s)" % t for t in tuple_parents]) query = 'INSERT INTO revision_history ' + \ '(id, parent_id, parent_rank) VALUES ' + tuples db.query_execute(db_conn, query) def add_release(db_conn, obj_sha, revision, date, name, comment, author): """Insert a release. """ db.query_execute( db_conn, ("""INSERT INTO release (id, revision, date, name, comment, author) VALUES (%s, %s, %s, %s, %s, (select id from person where name=%s and email=%s))""", (obj_sha, revision, date, name, comment, author['name'], author['email']))) def add_occurrence(db_conn, url_origin, branch, revision): """Insert an occurrence. Check if occurrence history already present. If present do nothing, otherwise insert """ with db_conn.cursor() as cur: occ = find_occurrence(cur, branch, revision, url_origin) if not occ: db.execute( cur, ("""INSERT INTO occurrence (origin, branch, revision) VALUES ((select id from origin where url=%s), %s, %s)""", (url_origin, branch, revision))) def find_revision(db_conn, obj_sha): """Find a revision by its obj_sha. """ return find_object(db_conn, obj_sha, Type.revision) def find_directory(db_conn, obj_sha): """Find a directory by its obj_sha. """ return find_object(db_conn, obj_sha, Type.directory) def find_content(db_conn, obj_sha): """Find a content by its obj_sha. """ return find_object(db_conn, obj_sha, Type.content, column='sha1') def find_occurrences_for_revision(db_conn, revision, type): """Find all occurences for a specific revisions. type is not used (implementation detail). """ return db.query_fetch(db_conn, ("""SELECT * FROM occurrence WHERE revision=%s""", (revision,))) def find_origin(db_conn, origin_url, origin_type): """Find all origins matching an url and an origin type. """ return db.query_fetchone(db_conn, ("""SELECT * FROM origin WHERE url=%s AND type=%s""", (origin_url, origin_type))) def find_person(db_conn, email, name): """Find a person uniquely identified by email and name. """ return db.query_fetchone(db_conn, ("""SELECT id FROM person WHERE email=%s AND name=%s""", (email, name))) def find_occurrence(cur, branch, revision, url_origin): """Find an ocurrence with branch pointing on valid revision for date. """ return db.fetchone( cur, ("""SELECT * FROM occurrence oc WHERE branch=%s AND revision=%s AND origin = (select id from origin where url = %s)""", (branch, revision, url_origin))) def find_object(db_conn, obj_sha, obj_type, column='id'): """Find an object of obj_type by its obj_sha. """ table = obj_type if isinstance(obj_type, str) else obj_type.value query = 'select ' + column + ' from ' + table + ' where ' + column + '=%s' return db.query_fetchone(db_conn, (query, (obj_sha,))) def filter_unknown_objects(db_conn, file_sha1s, table_to_filter, tbl_tmp_name, column_to_filter='id', nature_column='sha1_git'): """Given a list of sha1s, filter the unknown object between this list and the content of the table table_to_filter. tbl_tmp_name is the temporary table used to filter. """ with db_conn.cursor() as cur: # explicit is better than implicit # simply creating the temporary table seems to be enough db.execute(cur, """CREATE TEMPORARY TABLE IF NOT EXISTS %s( %s %s) ON COMMIT DELETE ROWS;""" % (tbl_tmp_name, column_to_filter, nature_column)) db.copy_from(cur, file_sha1s, tbl_tmp_name) db.execute(cur, '(SELECT %s FROM %s) EXCEPT (SELECT %s FROM %s);' % (column_to_filter, tbl_tmp_name, column_to_filter, table_to_filter)) return cur.fetchall() def find_unknown_revisions(db_conn, file_sha1s): """Filter unknown revisions from file_sha1s. """ return filter_unknown_objects(db_conn, file_sha1s, 'revision', 'filter_sha1_revision') def find_unknown_directories(db_conn, file_sha1s): """Filter unknown directories from file_sha1s. """ return filter_unknown_objects(db_conn, file_sha1s, 'directory', 'filter_sha1_directory') def find_unknown_contents(db_conn, file_sha1s): """Filter unknown contents from file_sha1s. """ return filter_unknown_objects(db_conn, file_sha1s, 'content', 'filter_sha1_content', 'sha1', 'sha1') def _count_objects(db_conn, type): """Count the number of a given type object. """ return db.query_fetchone(db_conn, 'SELECT count(*) FROM ' + type.value)[0] def count_revisions(db_conn): """Count the number of revisions. """ return _count_objects(db_conn, Type.revision) def count_directories(db_conn): """Count the number of directories. """ return _count_objects(db_conn, Type.directory) def count_contents(db_conn): """Count the number of contents. """ return _count_objects(db_conn, Type.content) def count_occurrence(db_conn): """Count the number of occurrence. """ return _count_objects(db_conn, Type.occurrence) def count_release(db_conn): """Count the number of occurrence. """ return _count_objects(db_conn, Type.release) def count_person(db_conn): """Count the number of occurrence. """ return _count_objects(db_conn, Type.person) diff --git a/swh/loader/git/storage/storage.py b/swh/loader/git/storage/storage.py index ed5734c..88cbc27 100755 --- a/swh/loader/git/storage/storage.py +++ b/swh/loader/git/storage/storage.py @@ -1,220 +1,221 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import StringIO from . import models Type = models.Type _find_object = {Type.occurrence: models.find_occurrences_for_revision, Type.content: lambda *args: models.find_object(*args, column='sha1')} def find(db_conn, id, type): """Find an object according to its sha1hex and type. """ return _find_object.get(type, models.find_object)(db_conn, id, type) _find_unknown = {Type.revision: models.find_unknown_revisions, Type.content: models.find_unknown_contents, Type.directory: models.find_unknown_directories} def find_unknowns(db_conn, obj_type, sha1s_hex): """Given a list of sha1s, return the non presents one in storage. """ def row_to_sha1(row): """Convert a row (memoryview) to a string sha1. """ return row[0] vals = '\n'.join(sha1s_hex) cpy_data_buffer = StringIO() cpy_data_buffer.write(vals) cpy_data_buffer.seek(0) # move file cursor back at start of file find_unknown_fn = _find_unknown[obj_type] unknowns = find_unknown_fn(db_conn, cpy_data_buffer) cpy_data_buffer.close() return list(map(row_to_sha1, unknowns)) def _add_content(db_conn, vcs_object, sha1hex): """Add a blob to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_content(db_conn, sha1hex, vcs_object['git-sha1'], vcs_object['content-sha256'], vcs_object['size']) return sha1hex def _add_directory(db_conn, vcs_object, sha1hex): """Add a directory to storage. Designed to be wrapped in a db transaction. """ parent_id = models.add_directory(db_conn, sha1hex) for directory_entry_dir in vcs_object['entry-dirs']: _add_directory_entry_dir(db_conn, parent_id, directory_entry_dir) for directory_entry_file in vcs_object['entry-files']: _add_directory_entry_file(db_conn, parent_id, directory_entry_file) return sha1hex def _add_directory_entry_dir(db_conn, parent_id, vcs_object): """Add a directory entry dir to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ name = vcs_object['name'] models.add_directory_entry_dir(db_conn, name, vcs_object['target-sha1'], vcs_object['perms'], vcs_object['atime'], vcs_object['mtime'], vcs_object['ctime'], parent_id) return name, parent_id def _add_directory_entry_file(db_conn, parent_id, vcs_object): """Add a directory to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ name = vcs_object['name'] models.add_directory_entry_file(db_conn, name, vcs_object['target-sha1'], vcs_object['perms'], vcs_object['atime'], vcs_object['mtime'], vcs_object['ctime'], parent_id) return name, parent_id def _add_revision(db_conn, vcs_object, sha1hex): """Add a revision to storage. Designed to be wrapped in a db transaction. Returns: - the sha1 if everything went alright. - None if something went wrong Writing exceptions can also be raised and expected to be handled by the caller. """ models.add_revision(db_conn, sha1hex, vcs_object['date'], + vcs_object['committer-date'], vcs_object['directory'], vcs_object['message'], vcs_object['author'], vcs_object['committer'], vcs_object['parent-sha1s']) return sha1hex def _add_release(db_conn, vcs_object, sha1hex): """Add a release. """ models.add_release(db_conn, sha1hex, vcs_object['revision'], vcs_object['date'], vcs_object['name'], vcs_object['comment'], vcs_object['author']) return sha1hex def _add_occurrence(db_conn, vcs_object, sha1hex): """Add an occurrence. """ models.add_occurrence(db_conn, vcs_object['url-origin'], vcs_object['branch'], vcs_object['revision']) return sha1hex def add_person(db_conn, vcs_object): """Add an author. """ return models.add_person(db_conn, vcs_object['name'], vcs_object['email']) _store_fn = {Type.directory: _add_directory, Type.revision: _add_revision, Type.release: _add_release, Type.occurrence: _add_occurrence} def add_origin(db_conn, origin): """A a new origin and returns its id. """ return models.add_origin(db_conn, origin['url'], origin['type']) def find_origin(db_conn, origin): """Find an existing origin. """ return models.find_origin(db_conn, origin['url'], origin['type']) def find_person(db_conn, person): """Find an existing person. """ return models.find_person(db_conn, person['email'], person['name']) def add_with_fs_storage(db_conn, config, id, type, vcs_object): """Add vcs_object in the storage - db_conn is the opened connection to the db - config is the map of configuration needed for core layer - type is not used here but represent the type of vcs_object - vcs_object is the object meant to be persisted in fs and db """ config['objstorage'].add_bytes(vcs_object['content'], id) return _add_content(db_conn, vcs_object, id) def add(db_conn, config, id, type, vcs_object): """Given a sha1hex, type and content, store a given object in the store. - db_conn is the opened connection to the db - config is not used here - type is the object's type - vcs_object is the object meant to be persisted in db """ return _store_fn[type](db_conn, vcs_object, id) def add_revision_history(db_conn, tuple_parents): """Given a list of tuple (sha, parent_sha), store in revision_history. """ if len(tuple_parents) > 0: models.add_revision_history(db_conn, tuple_parents) diff --git a/swh/loader/git/tests/test_api_occurrence.py b/swh/loader/git/tests/test_api_occurrence.py index 6ed3818..69b8474 100644 --- a/swh/loader/git/tests/test_api_occurrence.py +++ b/swh/loader/git/tests/test_api_occurrence.py @@ -1,131 +1,133 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.loader.git.storage import db, models from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class OccurrenceTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), + now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') self.reference_name = 'master' models.add_occurrence(db_conn, self.origin_url, self.reference_name, self.revision_sha1_hex) self.reference_name2 = 'master2' models.add_occurrence(db_conn, self.origin_url, self.reference_name2, self.revision_sha1_hex) self.revision_sha1_hex_2 = '2-revision-sha1-to-test-existence9994f71' models.add_revision(db_conn, self.revision_sha1_hex_2, now(), + now(), self.directory_sha1_hex, "revision message 2", authorAndCommitter, authorAndCommitter) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_occurrence_ok(self): # when rv = self.app.get('/vcs/occurrences/%s' % self.revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [self.reference_name, self.reference_name2] @istest def get_occurrence_not_found(self): # when rv = self.app.get('/vcs/occurrences/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_occurrence_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/occurrences/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_occurrence_create_and_update(self): occ_revision_sha1_hex = self.revision_sha1_hex_2 rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'revision': occ_revision_sha1_hex, # FIXME: redundant with the one from uri.. 'branch': 'master', 'url-origin': self.origin_url}) rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, # ... here data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] # we update it rv = self.app.put('/vcs/occurrences/%s' % occ_revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/occurrences/%s' % occ_revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data) == ['master'] diff --git a/swh/loader/git/tests/test_api_post_per_type.py b/swh/loader/git/tests/test_api_post_per_type.py index 87b4f3a..d8332de 100644 --- a/swh/loader/git/tests/test_api_post_per_type.py +++ b/swh/loader/git/tests/test_api_post_per_type.py @@ -1,213 +1,215 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.loader.git.storage import db, models from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class TestPostObjectsPerTypeCase(unittest.TestCase): def setUp(self): self.app, self.db_url, self.content_storage_dir = app_client() with db.connect(self.db_url) as db_conn: self.content_sha1_id = 'sha1-content0-6ee476a8be155ab049994f717e' self.content_sha256_hex = 'sha256-content0-e476a8be155ab049994f717e' models.add_content(db_conn, self.content_sha1_id, self.content_sha1_id, self.content_sha256_hex, 10) self.directory_sha1_hex = 'directory-sha1-ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, authorAndCommitter['name'], authorAndCommitter['email']) authorAndCommitter2 = {'name': 'tony', 'email': 'tony@dude.org'} models.add_person(db_conn, authorAndCommitter2['name'], authorAndCommitter2['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), + now(), self.directory_sha1_hex, "revision message", authorAndCommitter, authorAndCommitter) self.revision_sha1_hex2 = 'revision-sha1-2-for-testing-put-occurr' models.add_revision(db_conn, self.revision_sha1_hex2, now(), + now(), self.directory_sha1_hex, "revision message", authorAndCommitter2, authorAndCommitter2, parent_shas=['revision-sha1-to-test-existence9994f717e']) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", authorAndCommitter2) self.origin_url = "https://github.com/user/repo" models.add_origin(db_conn, self.origin_url, 'git') models.add_occurrence(db_conn, self.origin_url, 'master', self.revision_sha1_hex) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def post_all_non_presents_contents(self): # given # when payload = [self.content_sha1_id, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/contents/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_directories(self): # given # when payload = [self.directory_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/directories/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_revisions(self): # given # when payload = [self.revision_sha1_hex, self.revision_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/revisions/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 sha1s = serial.loads(rv.data) assert len(sha1s) is 2 # only 2 sha1s assert "666777f9dd5dc46ee476a8be155ab049994f717e" in sha1s assert "555444f9dd5dc46ee476a8be155ab049994f717e" in sha1s @istest def post_all_non_presents_releases(self): # given # when payload = [self.release_sha1_hex, self.release_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/releases/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 assert rv.data == b'Bad request. Type not supported!' @istest def post_all_non_presents_occurrences_KO(self): # given # when payload = [self.revision_sha1_hex, self.revision_sha1_hex, '555444f9dd5dc46ee476a8be155ab049994f717e', '555444f9dd5dc46ee476a8be155ab049994f717e', '666777f9dd5dc46ee476a8be155ab049994f717e'] query_payload = serial.dumps(payload) rv = self.app.post('/vcs/occurrences/', data=query_payload, headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 400 assert rv.data == b'Bad request. Type not supported!' @istest def post_non_presents_objects_empty_payload_so_empty_results(self): # given # when for api_type in ['contents', 'directories', 'revisions']: rv = self.app.post('/vcs/%s/' % api_type, data=serial.dumps({}), headers={'Content-Type': serial.MIMETYPE}) # then assert rv.status_code == 200 assert serial.loads(rv.data) == [] @istest def post_non_presents_objects_bad_requests_format_pickle(self): # given # when for api_type in ['contents', 'directories', 'revisions']: rv = self.app.post('/vcs/%s/' % api_type, data="not pickle -> fail") # then assert rv.status_code == 400 assert rv.data == b'Bad request. Expected application/octet-stream data!' diff --git a/swh/loader/git/tests/test_api_release.py b/swh/loader/git/tests/test_api_release.py index 54f8a52..d6c4ad7 100644 --- a/swh/loader/git/tests/test_api_release.py +++ b/swh/loader/git/tests/test_api_release.py @@ -1,119 +1,120 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.loader.git.storage import db, models from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class ReleaseTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.tagAuthor = {'name': 'tony', 'email': 'tony@mail.org'} models.add_person(db_conn, self.tagAuthor['name'], self.tagAuthor['email']) self.revision_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_sha1_hex, now(), + now(), self.directory_sha1_hex, "revision message", self.tagAuthor, self.tagAuthor) self.release_sha1_hex = 'release-sha1-to-test-existence1234567901' models.add_release(db_conn, self.release_sha1_hex, self.revision_sha1_hex, now(), "0.0.1", "Super release tagged by tony", self.tagAuthor) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_release_ok(self): # when rv = self.app.get('/vcs/releases/%s' % self.release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.release_sha1_hex @istest def get_release_not_found(self): # when rv = self.app.get('/vcs/releases/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_release_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/releases/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_release_create_and_update(self): release_sha1_hex = 'sha1-release46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'id': release_sha1_hex, 'revision': self.revision_sha1_hex, 'date': now(), 'name': '0.0.1', 'comment': 'super release tagged by ardumont', 'author': self.tagAuthor}) rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex # we update it rv = self.app.put('/vcs/releases/%s' % release_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/releases/%s' % release_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == release_sha1_hex diff --git a/swh/loader/git/tests/test_api_revision.py b/swh/loader/git/tests/test_api_revision.py index 25603e6..4f90968 100644 --- a/swh/loader/git/tests/test_api_revision.py +++ b/swh/loader/git/tests/test_api_revision.py @@ -1,130 +1,134 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from nose.plugins.attrib import attr from swh.loader.git.storage import db, models from swh.loader.git.protocols import serial from test_utils import now, app_client, app_client_teardown @attr('slow') class RevisionTestCase(unittest.TestCase): def setUp(self): self.app, db_url, self.content_storage_dir = app_client() with db.connect(db_url) as db_conn: self.directory_sha1_hex = 'directory-sha16ee476a8be155ab049994f717e' models.add_directory(db_conn, self.directory_sha1_hex) self.authorAndCommitter = {'name': 'some-name', 'email': 'some-email'} models.add_person(db_conn, self.authorAndCommitter['name'], self.authorAndCommitter['email']) self.revision_parent_sha1_hex = 'revision-sha1-to-test-existence9994f717e' models.add_revision(db_conn, self.revision_parent_sha1_hex, now(), + now(), self.directory_sha1_hex, "revision message", self.authorAndCommitter, self.authorAndCommitter) self.revision_parent_2_sha1_hex = 'revision-sha1-to-test-as-parent-994f717e' models.add_revision(db_conn, self.revision_parent_2_sha1_hex, now(), + now(), self.directory_sha1_hex, "revision message 2", self.authorAndCommitter, self.authorAndCommitter) self.revision_parent_3_sha1_hex = 'revision-sha1-to-test-as-parent-3-4f717e' models.add_revision(db_conn, self.revision_parent_3_sha1_hex, now(), + now(), self.directory_sha1_hex, "revision message 3", self.authorAndCommitter, self.authorAndCommitter) def tearDown(self): app_client_teardown(self.content_storage_dir) @istest def get_revision_ok(self): # when rv = self.app.get('/vcs/revisions/%s' % self.revision_parent_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == self.revision_parent_sha1_hex @istest def get_revision_not_found(self): # when rv = self.app.get('/vcs/revisions/inexistant-sha1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def get_revision_not_found_with_bad_format(self): # when rv = self.app.get('/vcs/revisions/1') # then assert rv.status_code == 404 assert rv.data == b'Not found!' @istest def put_revision_create_and_update(self): revision_sha1_hex = 'sha1-revision46ee476a8be155ab049994f717e' rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 404 assert rv.data == b'Not found!' # we create it body = serial.dumps({'date': now(), + 'committer-date': now(), 'directory': self.directory_sha1_hex, 'message': 'revision message describing it', 'committer': self.authorAndCommitter, 'author': self.authorAndCommitter, 'parent-sha1s': [self.revision_parent_sha1_hex, self.revision_parent_3_sha1_hex, self.revision_parent_2_sha1_hex]}) rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # now it exists rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex # we update it rv = self.app.put('/vcs/revisions/%s' % revision_sha1_hex, data=body, headers={'Content-Type': serial.MIMETYPE}) assert rv.status_code == 204 assert rv.data == b'' # still the same rv = self.app.get('/vcs/revisions/%s' % revision_sha1_hex) # then assert rv.status_code == 200 assert serial.loads(rv.data)['id'] == revision_sha1_hex