diff --git a/swh/loader/__init__.py b/swh/loader/core/__init__.py similarity index 100% rename from swh/loader/__init__.py rename to swh/loader/core/__init__.py diff --git a/swh/loader/converters.py b/swh/loader/core/converters.py similarity index 100% rename from swh/loader/converters.py rename to swh/loader/core/converters.py diff --git a/swh/loader/loader.py b/swh/loader/core/loader.py similarity index 100% rename from swh/loader/loader.py rename to swh/loader/core/loader.py diff --git a/swh/loader/queue.py b/swh/loader/core/queue.py similarity index 100% rename from swh/loader/queue.py rename to swh/loader/core/queue.py diff --git a/swh/loader/tasks.py b/swh/loader/core/tasks.py similarity index 100% rename from swh/loader/tasks.py rename to swh/loader/core/tasks.py diff --git a/swh/loader/tests/test_converters.py b/swh/loader/core/tests/test_converters.py similarity index 99% rename from swh/loader/tests/test_converters.py rename to swh/loader/core/tests/test_converters.py index e17fee4..b62caea 100644 --- a/swh/loader/tests/test_converters.py +++ b/swh/loader/core/tests/test_converters.py @@ -1,410 +1,410 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import tempfile import unittest from nose.tools import istest -from swh.loader import converters +from swh.loader.core import converters from swh.model import git def tmpfile_with_content(fromdir, contentfile): """Create a temporary file with content contentfile in directory fromdir. """ tmpfilepath = tempfile.mktemp( suffix='.swh', prefix='tmp-file-for-test', dir=fromdir) with open(tmpfilepath, 'wb') as f: f.write(contentfile) return tmpfilepath class TestConverters(unittest.TestCase): @classmethod def setUpClass(cls): super().setUpClass() cls.tmpdir = tempfile.mkdtemp(prefix='test-swh-loader-dir.') @classmethod def tearDownClass(cls): shutil.rmtree(cls.tmpdir) super().tearDownClass() @istest def format_to_minutes(self): self.assertEquals(converters.format_to_minutes('+0100'), 60) self.assertEquals(converters.format_to_minutes('-0200'), -120) self.assertEquals(converters.format_to_minutes('+1250'), 12*60+50) self.assertEquals(converters.format_to_minutes('+0000'), 0) self.assertEquals(converters.format_to_minutes('-0000'), 0) @istest def annotated_tag_to_release(self): # given release = { 'id': '123', 'target': '456', 'target_type': 'revision', 'name': 'some-release', 'comment': 'some-comment-on-release', 'date': 1444054085, 'offset': '-0300', 'author_name': 'someone', 'author_email': 'someone@whatelse.eu', } expected_release = { 'target': '456', 'target_type': 'revision', 'name': b'some-release', 'message': b'some-comment-on-release', 'date': { 'timestamp': 1444054085, 'offset': -180 }, 'author': { 'name': b'someone', 'email': b'someone@whatelse.eu', }, 'synthetic': True, } # when actual_release = converters.annotated_tag_to_release(release) # then self.assertDictEqual(actual_release, expected_release) @istest def blob_to_content_visible_data(self): # given contentfile = b'temp file for testing blob to content conversion' tmpfilepath = tmpfile_with_content(self.tmpdir, contentfile) obj = { 'path': tmpfilepath, 'perms': git.GitPerm.BLOB, 'type': git.GitType.BLOB, 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', } expected_blob = { 'data': contentfile, 'length': len(contentfile), 'status': 'visible', 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', 'perms': git.GitPerm.BLOB.value, 'type': git.GitType.BLOB.value, } # when actual_blob = converters.blob_to_content(obj) # then self.assertEqual(actual_blob, expected_blob) @istest def blob_to_content_link(self): # given contentfile = b'temp file for testing blob to content conversion' tmpfilepath = tmpfile_with_content(self.tmpdir, contentfile) tmplinkpath = tempfile.mktemp(dir=self.tmpdir) os.symlink(tmpfilepath, tmplinkpath) obj = { 'path': tmplinkpath, 'perms': git.GitPerm.BLOB, 'type': git.GitType.BLOB, 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', } expected_blob = { 'data': contentfile, 'length': len(tmpfilepath), 'status': 'visible', 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', 'perms': git.GitPerm.BLOB.value, 'type': git.GitType.BLOB.value, } # when actual_blob = converters.blob_to_content(obj) # then self.assertEqual(actual_blob, expected_blob) @istest def blob_to_content_link_with_data_length_populated(self): # given tmplinkpath = tempfile.mktemp(dir=self.tmpdir) obj = { 'length': 10, # wrong for test purposes 'data': 'something wrong', # again for test purposes 'path': tmplinkpath, 'perms': git.GitPerm.BLOB, 'type': git.GitType.BLOB, 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', } expected_blob = { 'length': 10, 'data': 'something wrong', 'status': 'visible', 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', 'perms': git.GitPerm.BLOB.value, 'type': git.GitType.BLOB.value, } # when actual_blob = converters.blob_to_content(obj) # then self.assertEqual(actual_blob, expected_blob) @istest def blob_to_content2_absent_data(self): # given contentfile = b'temp file for testing blob to content conversion' tmpfilepath = tmpfile_with_content(self.tmpdir, contentfile) obj = { 'path': tmpfilepath, 'perms': git.GitPerm.BLOB, 'type': git.GitType.BLOB, 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', } expected_blob = { 'length': len(contentfile), 'status': 'absent', 'sha1': 'some-sha1', 'sha256': 'some-sha256', 'sha1_git': 'some-sha1git', 'perms': git.GitPerm.BLOB.value, 'type': git.GitType.BLOB.value, 'reason': 'Content too large', 'origin': 190 } # when actual_blob = converters.blob_to_content(obj, None, max_content_size=10, origin_id=190) # then self.assertEqual(actual_blob, expected_blob) @istest def tree_to_directory_no_entries(self): # given tree = { 'path': 'foo', 'sha1_git': b'tree_sha1_git' } objects = { 'foo': [{'type': git.GitType.TREE, 'perms': git.GitPerm.TREE, 'name': 'bar', 'sha1_git': b'sha1-target'}, {'type': git.GitType.BLOB, 'perms': git.GitPerm.BLOB, 'name': 'file-foo', 'sha1_git': b'file-foo-sha1-target'}] } expected_directory = { 'id': b'tree_sha1_git', 'entries': [{'type': 'dir', 'perms': int(git.GitPerm.TREE.value), 'name': 'bar', 'target': b'sha1-target'}, {'type': 'file', 'perms': int(git.GitPerm.BLOB.value), 'name': 'file-foo', 'target': b'file-foo-sha1-target'}] } # when actual_directory = converters.tree_to_directory(tree, objects) # then self.assertEqual(actual_directory, expected_directory) @istest def commit_to_revision(self): # given commit = { 'sha1_git': 'commit-git-sha1', 'author_date': 1444054085, 'author_offset': '+0000', 'committer_date': 1444054085, 'committer_offset': '-0000', 'type': 'tar', 'message': 'synthetic-message-input', 'author_name': 'author-name', 'author_email': 'author-email', 'committer_name': 'committer-name', 'committer_email': 'committer-email', 'metadata': {'checksums': {'sha1': b'sha1-as-bytes'}}, 'directory': 'targeted-tree-sha1', } objects = { git.ROOT_TREE_KEY: [{'sha1_git': 'targeted-tree-sha1'}] } expected_revision = { 'date': { 'timestamp': 1444054085, 'offset': 0, }, 'committer_date': { 'timestamp': 1444054085, 'offset': 0, }, 'type': 'tar', 'directory': 'targeted-tree-sha1', 'message': b'synthetic-message-input', 'author': { 'name': b'author-name', 'email': b'author-email', }, 'committer': { 'name': b'committer-name', 'email': b'committer-email', }, 'synthetic': True, 'metadata': {'checksums': {'sha1': b'sha1-as-bytes'}}, 'parents': [], } # when actual_revision = converters.commit_to_revision(commit, objects) # then self.assertEquals(actual_revision, expected_revision) @istest def ref_to_occurrence_1(self): # when actual_occ = converters.ref_to_occurrence({ 'id': 'some-id', 'branch': 'some/branch' }) # then self.assertEquals(actual_occ, { 'id': 'some-id', 'branch': b'some/branch' }) @istest def ref_to_occurrence_2(self): # when actual_occ = converters.ref_to_occurrence({ 'id': 'some-id', 'branch': b'some/branch' }) # then self.assertEquals(actual_occ, { 'id': 'some-id', 'branch': b'some/branch' }) @istest def shallow_blob(self): # when actual_blob = converters.shallow_blob({ 'length': 1451, 'sha1_git': b'\xd1\xdd\x9a@\xeb\xf6!\x99\xd4[S\x05\xa8Y\xa3\x80\xa7\xb1;\x9c', 'name': b'LDPCL', 'type': b'blob', 'sha256': b'\xe6it!\x99\xb37UT\x8f\x0e\x8f\xd7o\x92"\xce\xa3\x1d\xd2\xe5D>M\xaaj/\x03\x138\xad\x1b', # noqa 'perms': b'100644', 'sha1': b'.\x18Y\xd6M\x8c\x9a\xa4\xe1\xf1\xc7\x95\x082\xcf\xc9\xd8\nV)', 'path': b'/tmp/tmp.c86tq5o9.swh.loader/pkg-doc-linux/copyrights/non-free/LDPCL' # noqa }) # then self.assertEqual(actual_blob, { 'sha1': b'.\x18Y\xd6M\x8c\x9a\xa4\xe1\xf1\xc7\x95\x082\xcf\xc9\xd8\nV)', 'sha1_git': b'\xd1\xdd\x9a@\xeb\xf6!\x99\xd4[S\x05\xa8Y\xa3\x80\xa7\xb1;\x9c', 'sha256': b'\xe6it!\x99\xb37UT\x8f\x0e\x8f\xd7o\x92"\xce\xa3\x1d\xd2\xe5D>M\xaaj/\x03\x138\xad\x1b', # noqa 'length': 1451, }) @istest def shallow_tree(self): # when actual_shallow_tree = converters.shallow_tree({ 'length': 1451, 'sha1_git': b'tree-id', 'type': b'tree', 'sha256': b'\xe6it!\x99\xb37UT\x8f\x0e\x8f\xd7o\x92"\xce\xa3\x1d\xd2\xe5D>M\xaaj/\x03\x138\xad\x1b', # noqa 'perms': b'100644', 'sha1': b'.\x18Y\xd6M\x8c\x9a\xa4\xe1\xf1\xc7\x95\x082\xcf\xc9\xd8\nV)', }) # then self.assertEqual(actual_shallow_tree, b'tree-id') @istest def shallow_commit(self): # when actual_shallow_commit = converters.shallow_commit({ 'sha1_git': b'\xd1\xdd\x9a@\xeb\xf6!\x99\xd4[S\x05\xa8Y\xa3\x80\xa7\xb1;\x9c', 'type': b'commit', 'id': b'let-me-see-some-id', }) # then self.assertEqual(actual_shallow_commit, b'let-me-see-some-id') @istest def shallow_tag(self): # when actual_shallow_tag = converters.shallow_tag({ 'sha1': b'\xd1\xdd\x9a@\xeb\xf6!\x99\xd4[S\x05\xa8Y\xa3\x80\xa7\xb1;\x9c', 'type': b'tag', 'id': b'this-is-not-the-id-you-are-looking-for', }) # then self.assertEqual(actual_shallow_tag, b'this-is-not-the-id-you-are-looking-for') # noqa diff --git a/swh/loader/tests/test_queue.py b/swh/loader/core/tests/test_queue.py similarity index 96% rename from swh/loader/tests/test_queue.py rename to swh/loader/core/tests/test_queue.py index 1706a58..c036868 100644 --- a/swh/loader/tests/test_queue.py +++ b/swh/loader/core/tests/test_queue.py @@ -1,141 +1,141 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest -from swh.loader.queue import QueuePerNbElements -from swh.loader.queue import QueuePerNbUniqueElements -from swh.loader.queue import QueuePerSizeAndNbUniqueElements +from swh.loader.core.queue import QueuePerNbElements +from swh.loader.core.queue import QueuePerNbUniqueElements +from swh.loader.core.queue import QueuePerSizeAndNbUniqueElements class TestQueuePerNbElements(unittest.TestCase): @istest def simple_queue_behavior(self): max_nb_elements = 10 queue = QueuePerNbElements(max_nb_elements=max_nb_elements) elements = [1, 3, 4, 9, 20, 30, 40] actual_threshold = queue.add(elements) self.assertFalse(actual_threshold, len(elements) > max_nb_elements) # pop returns the content and reset the queue actual_elements = queue.pop() self.assertEquals(actual_elements, elements) self.assertEquals(queue.pop(), []) # duplicates can be integrated new_elements = [1, 1, 3, 4, 9, 20, 30, 40, 12, 14, 2] actual_threshold = queue.add(new_elements) self.assertTrue(actual_threshold) self.assertEquals(queue.pop(), new_elements) # reset is destructive too queue.add(new_elements) queue.reset() self.assertEquals(queue.pop(), []) def to_some_objects(elements, key): for elt in elements: yield {key: elt} class TestQueuePerNbUniqueElements(unittest.TestCase): @istest def queue_with_unique_key_behavior(self): max_nb_elements = 5 queue = QueuePerNbUniqueElements(max_nb_elements=max_nb_elements, key='id') # no duplicates elements = list(to_some_objects([1, 1, 3, 4, 9], key='id')) actual_threshold = queue.add(elements) self.assertFalse(actual_threshold, len(elements) > max_nb_elements) # pop returns the content and reset the queue actual_elements = queue.pop() self.assertEquals(actual_elements, [{'id': 1}, {'id': 3}, {'id': 4}, {'id': 9}]) self.assertEquals(queue.pop(), []) new_elements = list(to_some_objects( [1, 3, 4, 9, 20], key='id')) actual_threshold = queue.add(new_elements) self.assertTrue(actual_threshold) # reset is destructive too queue.add(new_elements) queue.reset() self.assertEquals(queue.pop(), []) def to_some_complex_objects(elements, key): for elt, size in elements: yield {key: elt, 'length': size} class TestQueuePerSizeAndNbUniqueElements(unittest.TestCase): @istest def queue_with_unique_key_and_size_behavior(self): max_nb_elements = 5 max_size = 100 queue = QueuePerSizeAndNbUniqueElements( max_nb_elements=max_nb_elements, max_size=max_size, key='k') # size total exceeded, nb elements not reached, still the # threshold is deemed reached elements = list(to_some_complex_objects([(1, 10), (2, 20), (3, 30), (4, 100)], key='k')) actual_threshold = queue.add(elements) self.assertTrue(actual_threshold) # pop returns the content and reset the queue actual_elements = queue.pop() self.assertEquals(actual_elements, [{'k': 1, 'length': 10}, {'k': 2, 'length': 20}, {'k': 3, 'length': 30}, {'k': 4, 'length': 100}]) self.assertEquals(queue.pop(), []) # size threshold not reached, nb elements reached, the # threshold is considered reached new_elements = list(to_some_complex_objects( [(1, 10), (3, 5), (4, 2), (9, 1), (20, 0)], key='k')) actual_threshold = queue.add(new_elements) queue.reset() self.assertTrue(actual_threshold) # nb elements threshold not reached, nor the top number of # elements, the threshold is not reached new_elements = list(to_some_complex_objects( [(1, 10)], key='k')) actual_threshold = queue.add(new_elements) self.assertFalse(actual_threshold) # reset is destructive too queue.add(new_elements) queue.reset() self.assertEquals(queue.pop(), [])