Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F11023609
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
View Options
diff --git a/swh/loader/git/tests/test_from_disk.py b/swh/loader/git/tests/test_from_disk.py
index 00a7ecf..96b74b3 100644
--- a/swh/loader/git/tests/test_from_disk.py
+++ b/swh/loader/git/tests/test_from_disk.py
@@ -1,298 +1,322 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os.path
-import subprocess
+import dulwich.repo
from swh.loader.git.from_disk import GitLoaderFromDisk as OrigGitLoaderFromDisk
from swh.loader.git.from_disk import GitLoaderFromArchive as OrigGitLoaderFromArchive
from swh.loader.core.tests import BaseLoaderTest
from swh.model.hashutil import hash_to_bytes
from . import TEST_LOADER_CONFIG
class GitLoaderFromArchive(OrigGitLoaderFromArchive):
def project_name_from_archive(self, archive_path):
# We don't want the project name to be 'resources'.
return "testrepo"
def parse_config_file(self, *args, **kwargs):
return TEST_LOADER_CONFIG
CONTENT1 = {
"33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d", # README v1
"349c4ff7d21f1ec0eda26f3d9284c293e3425417", # README v2
"799c11e348d39f1704022b8354502e2f81f3c037", # file1.txt
"4bdb40dfd6ec75cb730e678b5d7786e30170c5fb", # file2.txt
}
SNAPSHOT_ID = "a23699280a82a043f8c0994cf1631b568f716f95"
SNAPSHOT1 = {
"id": SNAPSHOT_ID,
"branches": {
"HEAD": {"target": "refs/heads/master", "target_type": "alias",},
"refs/heads/master": {
"target": "2f01f5ca7e391a2f08905990277faf81e709a649",
"target_type": "revision",
},
"refs/heads/branch1": {
"target": "b0a77609903f767a2fd3d769904ef9ef68468b87",
"target_type": "revision",
},
"refs/heads/branch2": {
"target": "bd746cd1913721b269b395a56a97baf6755151c2",
"target_type": "revision",
},
"refs/tags/branch2-after-delete": {
"target": "bd746cd1913721b269b395a56a97baf6755151c2",
"target_type": "revision",
},
"refs/tags/branch2-before-delete": {
"target": "1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b",
"target_type": "revision",
},
},
}
# directory hashes obtained with:
# gco b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a
# swh-hashtree --ignore '.git' --path .
# gco 2f01f5ca7e391a2f08905990277faf81e709a649
# swh-hashtree --ignore '.git' --path .
# gco bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777
# swh-hashtree --ignore '.git' --path .
# gco 1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b
# swh-hashtree --ignore '.git' --path .
# gco 79f65ac75f79dda6ff03d66e1242702ab67fb51c
# swh-hashtree --ignore '.git' --path .
# gco b0a77609903f767a2fd3d769904ef9ef68468b87
# swh-hashtree --ignore '.git' --path .
# gco bd746cd1913721b269b395a56a97baf6755151c2
# swh-hashtree --ignore '.git' --path .
REVISIONS1 = {
"b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a": (
"40dbdf55dfd4065422462cc74a949254aefa972e"
),
"2f01f5ca7e391a2f08905990277faf81e709a649": (
"e1d0d894835f91a0f887a4bc8b16f81feefdfbd5"
),
"bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777": (
"b43724545b4759244bb54be053c690649161411c"
),
"1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b": (
"fbf70528223d263661b5ad4b80f26caf3860eb8e"
),
"79f65ac75f79dda6ff03d66e1242702ab67fb51c": (
"5df34ec74d6f69072d9a0a6677d8efbed9b12e60"
),
"b0a77609903f767a2fd3d769904ef9ef68468b87": (
"9ca0c7d6ffa3f9f0de59fd7912e08f11308a1338"
),
"bd746cd1913721b269b395a56a97baf6755151c2": (
"e1d0d894835f91a0f887a4bc8b16f81feefdfbd5"
),
}
class BaseGitLoaderFromDiskTest(BaseLoaderTest):
def setUp(self, archive_name, uncompress_archive, filename="testrepo"):
super().setUp(
archive_name=archive_name,
filename=filename,
prefix_tmp_folder_name="swh.loader.git.",
start_path=os.path.dirname(__file__),
uncompress_archive=uncompress_archive,
)
class GitLoaderFromDiskTest(OrigGitLoaderFromDisk):
def parse_config_file(self, *args, **kwargs):
return TEST_LOADER_CONFIG
class BaseDirGitLoaderFromDiskTest(BaseGitLoaderFromDiskTest):
"""Mixin base loader test to prepare the git
repository to uncompress, load and test the results.
This sets up
"""
def setUp(self):
super().setUp("testrepo.tgz", uncompress_archive=True)
self.loader = GitLoaderFromDiskTest(
url=self.repo_url,
visit_date="2016-05-03 15:16:32+00",
directory=self.destination_path,
)
self.storage = self.loader.storage
+ self.repo = dulwich.repo.Repo(self.destination_path)
def load(self):
return self.loader.load()
class BaseGitLoaderFromArchiveTest(BaseGitLoaderFromDiskTest):
"""Mixin base loader test to prepare the git
repository to uncompress, load and test the results.
This sets up
"""
def setUp(self):
super().setUp("testrepo.tgz", uncompress_archive=False)
self.loader = GitLoaderFromArchive(
url=self.repo_url,
visit_date="2016-05-03 15:16:32+00",
archive_path=self.destination_path,
)
self.storage = self.loader.storage
def load(self):
return self.loader.load()
class GitLoaderFromDiskTests:
"""Common tests for all git loaders."""
def test_load(self):
"""Loads a simple repository (made available by `setUp()`),
and checks everything was added in the storage."""
res = self.load()
self.assertEqual(res["status"], "eventful", res)
self.assertContentsContain(CONTENT1)
self.assertCountDirectories(7)
- self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountReleases(0) # FIXME: should be 2 after T2059
self.assertCountRevisions(7)
self.assertCountSnapshots(1)
self.assertRevisionsContain(REVISIONS1)
self.assertSnapshotEqual(SNAPSHOT1)
self.assertEqual(self.loader.load_status(), {"status": "eventful"})
self.assertEqual(self.loader.visit_status(), "full")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertEqual(visit["snapshot"], hash_to_bytes(SNAPSHOT1["id"]))
self.assertEqual(visit["status"], "full")
def test_load_unchanged(self):
"""Checks loading a repository a second time does not add
any extra data."""
res = self.load()
self.assertEqual(res["status"], "eventful")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertEqual(visit["snapshot"], hash_to_bytes(SNAPSHOT1["id"]))
self.assertEqual(visit["status"], "full")
res = self.load()
self.assertEqual(res["status"], "uneventful")
self.assertCountSnapshots(1)
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertEqual(visit["snapshot"], hash_to_bytes(SNAPSHOT1["id"]))
self.assertEqual(visit["status"], "full")
class DirGitLoaderTest(BaseDirGitLoaderFromDiskTest, GitLoaderFromDiskTests):
"""Tests for the GitLoaderFromDisk. Includes the common ones, and
add others that only work with a local dir."""
- def _git(self, *cmd):
- """Small wrapper around subprocess to call Git."""
- try:
- return subprocess.check_output(
- ["git", "-C", self.destination_path] + list(cmd)
- )
- except subprocess.CalledProcessError as e:
- print(e.output)
- print(e.stderr)
- raise
-
def test_load_changed(self):
"""Loads a repository, makes some changes by adding files, commits,
and merges, load it again, and check the storage contains everything
it should."""
# Initial load
res = self.load()
self.assertEqual(res["status"], "eventful", res)
- self._git("config", "--local", "user.email", "you@example.com")
- self._git("config", "--local", "user.name", "Your Name")
-
# Load with a new file + revision
with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
fd.write("print('Hello world')\n")
- self._git("add", "hello.py")
- self._git("commit", "-m", "Hello world")
- new_revision = self._git("rev-parse", "master").decode().strip()
+ self.repo.stage([b"hello.py"])
+ new_revision = self.repo.do_commit(b"Hello world\n").decode()
+ new_dir = "85dae072a5aa9923ffa7a7568f819ff21bf49858"
+
+ assert self.repo[new_revision.encode()].tree == new_dir.encode()
revisions = REVISIONS1.copy()
assert new_revision not in revisions
- revisions[new_revision] = "85dae072a5aa9923ffa7a7568f819ff21bf49858"
+ revisions[new_revision] = new_dir
res = self.load()
self.assertEqual(res["status"], "eventful")
self.assertCountContents(4 + 1)
self.assertCountDirectories(7 + 1)
- self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountReleases(0) # FIXME: should be 2 after T2059
self.assertCountRevisions(7 + 1)
self.assertCountSnapshots(1 + 1)
self.assertRevisionsContain(revisions)
- # TODO: how to check the snapshot id?
- # self.assertSnapshotEqual(SNAPSHOT1)
-
self.assertEqual(self.loader.load_status(), {"status": "eventful"})
self.assertEqual(self.loader.visit_status(), "full")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertIsNotNone(visit["snapshot"])
self.assertEqual(visit["status"], "full")
- # Load with a new merge
- self._git("merge", "branch1", "-m", "merge")
- new_revision = self._git("rev-parse", "master").decode().strip()
+ snapshot_id = visit["snapshot"]
+ snapshot = self.storage.snapshot_get(snapshot_id)
+ branches = snapshot["branches"]
+ assert branches[b"HEAD"] == {
+ "target": b"refs/heads/master",
+ "target_type": "alias",
+ }
+ assert branches[b"refs/heads/master"] == {
+ "target": hash_to_bytes(new_revision),
+ "target_type": "revision",
+ }
- assert new_revision not in revisions
- revisions[new_revision] = "dab8a37df8db8666d4e277bef9a546f585b5bedd"
+ # Merge branch1 into HEAD.
+
+ current = self.repo[b"HEAD"]
+ branch1 = self.repo[b"refs/heads/branch1"]
+
+ merged_tree = dulwich.objects.Tree()
+ for item in self.repo[current.tree].items():
+ merged_tree.add(*item)
+ for item in self.repo[branch1.tree].items():
+ merged_tree.add(*item)
+
+ merged_dir_id = "dab8a37df8db8666d4e277bef9a546f585b5bedd"
+ assert merged_tree.id.decode() == merged_dir_id
+ self.repo.object_store.add_object(merged_tree)
+
+ merge_commit = self.repo.do_commit(
+ b"merge.\n", tree=merged_tree.id, merge_heads=[branch1.id]
+ )
+
+ assert merge_commit.decode() not in revisions
+ revisions[merge_commit.decode()] = merged_tree.id.decode()
res = self.load()
self.assertEqual(res["status"], "eventful")
self.assertCountContents(4 + 1)
self.assertCountDirectories(7 + 2)
- self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountReleases(0) # FIXME: should be 2 after T2059
self.assertCountRevisions(7 + 2)
self.assertCountSnapshots(1 + 1 + 1)
self.assertRevisionsContain(revisions)
- # TODO: how to check the snapshot id?
- # self.assertSnapshotEqual(SNAPSHOT1)
-
self.assertEqual(self.loader.load_status(), {"status": "eventful"})
self.assertEqual(self.loader.visit_status(), "full")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertIsNotNone(visit["snapshot"])
self.assertEqual(visit["status"], "full")
+ merge_snapshot_id = visit["snapshot"]
+ assert merge_snapshot_id != snapshot_id
+
+ merge_snapshot = self.storage.snapshot_get(merge_snapshot_id)
+ merge_branches = merge_snapshot["branches"]
+ assert merge_branches[b"HEAD"] == {
+ "target": b"refs/heads/master",
+ "target_type": "alias",
+ }
+ assert merge_branches[b"refs/heads/master"] == {
+ "target": hash_to_bytes(merge_commit.decode()),
+ "target_type": "revision",
+ }
+
class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderFromDiskTests):
"""Tests for GitLoaderFromArchive. Imports the common ones
from GitLoaderTests."""
pass
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Sep 18, 4:52 PM (1 d, 16 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3327591
Attached To
rDLDG Git loader
Event Timeline
Log In to Comment