Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/git/tests/test_from_disk.py b/swh/loader/git/tests/test_from_disk.py
index 00a7ecf..96b74b3 100644
--- a/swh/loader/git/tests/test_from_disk.py
+++ b/swh/loader/git/tests/test_from_disk.py
@@ -1,298 +1,322 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os.path
-import subprocess
+import dulwich.repo
from swh.loader.git.from_disk import GitLoaderFromDisk as OrigGitLoaderFromDisk
from swh.loader.git.from_disk import GitLoaderFromArchive as OrigGitLoaderFromArchive
from swh.loader.core.tests import BaseLoaderTest
from swh.model.hashutil import hash_to_bytes
from . import TEST_LOADER_CONFIG
class GitLoaderFromArchive(OrigGitLoaderFromArchive):
def project_name_from_archive(self, archive_path):
# We don't want the project name to be 'resources'.
return "testrepo"
def parse_config_file(self, *args, **kwargs):
return TEST_LOADER_CONFIG
CONTENT1 = {
"33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d", # README v1
"349c4ff7d21f1ec0eda26f3d9284c293e3425417", # README v2
"799c11e348d39f1704022b8354502e2f81f3c037", # file1.txt
"4bdb40dfd6ec75cb730e678b5d7786e30170c5fb", # file2.txt
}
SNAPSHOT_ID = "a23699280a82a043f8c0994cf1631b568f716f95"
SNAPSHOT1 = {
"id": SNAPSHOT_ID,
"branches": {
"HEAD": {"target": "refs/heads/master", "target_type": "alias",},
"refs/heads/master": {
"target": "2f01f5ca7e391a2f08905990277faf81e709a649",
"target_type": "revision",
},
"refs/heads/branch1": {
"target": "b0a77609903f767a2fd3d769904ef9ef68468b87",
"target_type": "revision",
},
"refs/heads/branch2": {
"target": "bd746cd1913721b269b395a56a97baf6755151c2",
"target_type": "revision",
},
"refs/tags/branch2-after-delete": {
"target": "bd746cd1913721b269b395a56a97baf6755151c2",
"target_type": "revision",
},
"refs/tags/branch2-before-delete": {
"target": "1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b",
"target_type": "revision",
},
},
}
# directory hashes obtained with:
# gco b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a
# swh-hashtree --ignore '.git' --path .
# gco 2f01f5ca7e391a2f08905990277faf81e709a649
# swh-hashtree --ignore '.git' --path .
# gco bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777
# swh-hashtree --ignore '.git' --path .
# gco 1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b
# swh-hashtree --ignore '.git' --path .
# gco 79f65ac75f79dda6ff03d66e1242702ab67fb51c
# swh-hashtree --ignore '.git' --path .
# gco b0a77609903f767a2fd3d769904ef9ef68468b87
# swh-hashtree --ignore '.git' --path .
# gco bd746cd1913721b269b395a56a97baf6755151c2
# swh-hashtree --ignore '.git' --path .
REVISIONS1 = {
"b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a": (
"40dbdf55dfd4065422462cc74a949254aefa972e"
),
"2f01f5ca7e391a2f08905990277faf81e709a649": (
"e1d0d894835f91a0f887a4bc8b16f81feefdfbd5"
),
"bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777": (
"b43724545b4759244bb54be053c690649161411c"
),
"1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b": (
"fbf70528223d263661b5ad4b80f26caf3860eb8e"
),
"79f65ac75f79dda6ff03d66e1242702ab67fb51c": (
"5df34ec74d6f69072d9a0a6677d8efbed9b12e60"
),
"b0a77609903f767a2fd3d769904ef9ef68468b87": (
"9ca0c7d6ffa3f9f0de59fd7912e08f11308a1338"
),
"bd746cd1913721b269b395a56a97baf6755151c2": (
"e1d0d894835f91a0f887a4bc8b16f81feefdfbd5"
),
}
class BaseGitLoaderFromDiskTest(BaseLoaderTest):
def setUp(self, archive_name, uncompress_archive, filename="testrepo"):
super().setUp(
archive_name=archive_name,
filename=filename,
prefix_tmp_folder_name="swh.loader.git.",
start_path=os.path.dirname(__file__),
uncompress_archive=uncompress_archive,
)
class GitLoaderFromDiskTest(OrigGitLoaderFromDisk):
def parse_config_file(self, *args, **kwargs):
return TEST_LOADER_CONFIG
class BaseDirGitLoaderFromDiskTest(BaseGitLoaderFromDiskTest):
"""Mixin base loader test to prepare the git
repository to uncompress, load and test the results.
This sets up
"""
def setUp(self):
super().setUp("testrepo.tgz", uncompress_archive=True)
self.loader = GitLoaderFromDiskTest(
url=self.repo_url,
visit_date="2016-05-03 15:16:32+00",
directory=self.destination_path,
)
self.storage = self.loader.storage
+ self.repo = dulwich.repo.Repo(self.destination_path)
def load(self):
return self.loader.load()
class BaseGitLoaderFromArchiveTest(BaseGitLoaderFromDiskTest):
"""Mixin base loader test to prepare the git
repository to uncompress, load and test the results.
This sets up
"""
def setUp(self):
super().setUp("testrepo.tgz", uncompress_archive=False)
self.loader = GitLoaderFromArchive(
url=self.repo_url,
visit_date="2016-05-03 15:16:32+00",
archive_path=self.destination_path,
)
self.storage = self.loader.storage
def load(self):
return self.loader.load()
class GitLoaderFromDiskTests:
"""Common tests for all git loaders."""
def test_load(self):
"""Loads a simple repository (made available by `setUp()`),
and checks everything was added in the storage."""
res = self.load()
self.assertEqual(res["status"], "eventful", res)
self.assertContentsContain(CONTENT1)
self.assertCountDirectories(7)
- self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountReleases(0) # FIXME: should be 2 after T2059
self.assertCountRevisions(7)
self.assertCountSnapshots(1)
self.assertRevisionsContain(REVISIONS1)
self.assertSnapshotEqual(SNAPSHOT1)
self.assertEqual(self.loader.load_status(), {"status": "eventful"})
self.assertEqual(self.loader.visit_status(), "full")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertEqual(visit["snapshot"], hash_to_bytes(SNAPSHOT1["id"]))
self.assertEqual(visit["status"], "full")
def test_load_unchanged(self):
"""Checks loading a repository a second time does not add
any extra data."""
res = self.load()
self.assertEqual(res["status"], "eventful")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertEqual(visit["snapshot"], hash_to_bytes(SNAPSHOT1["id"]))
self.assertEqual(visit["status"], "full")
res = self.load()
self.assertEqual(res["status"], "uneventful")
self.assertCountSnapshots(1)
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertEqual(visit["snapshot"], hash_to_bytes(SNAPSHOT1["id"]))
self.assertEqual(visit["status"], "full")
class DirGitLoaderTest(BaseDirGitLoaderFromDiskTest, GitLoaderFromDiskTests):
"""Tests for the GitLoaderFromDisk. Includes the common ones, and
add others that only work with a local dir."""
- def _git(self, *cmd):
- """Small wrapper around subprocess to call Git."""
- try:
- return subprocess.check_output(
- ["git", "-C", self.destination_path] + list(cmd)
- )
- except subprocess.CalledProcessError as e:
- print(e.output)
- print(e.stderr)
- raise
-
def test_load_changed(self):
"""Loads a repository, makes some changes by adding files, commits,
and merges, load it again, and check the storage contains everything
it should."""
# Initial load
res = self.load()
self.assertEqual(res["status"], "eventful", res)
- self._git("config", "--local", "user.email", "you@example.com")
- self._git("config", "--local", "user.name", "Your Name")
-
# Load with a new file + revision
with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
fd.write("print('Hello world')\n")
- self._git("add", "hello.py")
- self._git("commit", "-m", "Hello world")
- new_revision = self._git("rev-parse", "master").decode().strip()
+ self.repo.stage([b"hello.py"])
+ new_revision = self.repo.do_commit(b"Hello world\n").decode()
+ new_dir = "85dae072a5aa9923ffa7a7568f819ff21bf49858"
+
+ assert self.repo[new_revision.encode()].tree == new_dir.encode()
revisions = REVISIONS1.copy()
assert new_revision not in revisions
- revisions[new_revision] = "85dae072a5aa9923ffa7a7568f819ff21bf49858"
+ revisions[new_revision] = new_dir
res = self.load()
self.assertEqual(res["status"], "eventful")
self.assertCountContents(4 + 1)
self.assertCountDirectories(7 + 1)
- self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountReleases(0) # FIXME: should be 2 after T2059
self.assertCountRevisions(7 + 1)
self.assertCountSnapshots(1 + 1)
self.assertRevisionsContain(revisions)
- # TODO: how to check the snapshot id?
- # self.assertSnapshotEqual(SNAPSHOT1)
-
self.assertEqual(self.loader.load_status(), {"status": "eventful"})
self.assertEqual(self.loader.visit_status(), "full")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertIsNotNone(visit["snapshot"])
self.assertEqual(visit["status"], "full")
- # Load with a new merge
- self._git("merge", "branch1", "-m", "merge")
- new_revision = self._git("rev-parse", "master").decode().strip()
+ snapshot_id = visit["snapshot"]
+ snapshot = self.storage.snapshot_get(snapshot_id)
+ branches = snapshot["branches"]
+ assert branches[b"HEAD"] == {
+ "target": b"refs/heads/master",
+ "target_type": "alias",
+ }
+ assert branches[b"refs/heads/master"] == {
+ "target": hash_to_bytes(new_revision),
+ "target_type": "revision",
+ }
- assert new_revision not in revisions
- revisions[new_revision] = "dab8a37df8db8666d4e277bef9a546f585b5bedd"
+ # Merge branch1 into HEAD.
+
+ current = self.repo[b"HEAD"]
+ branch1 = self.repo[b"refs/heads/branch1"]
+
+ merged_tree = dulwich.objects.Tree()
+ for item in self.repo[current.tree].items():
+ merged_tree.add(*item)
+ for item in self.repo[branch1.tree].items():
+ merged_tree.add(*item)
+
+ merged_dir_id = "dab8a37df8db8666d4e277bef9a546f585b5bedd"
+ assert merged_tree.id.decode() == merged_dir_id
+ self.repo.object_store.add_object(merged_tree)
+
+ merge_commit = self.repo.do_commit(
+ b"merge.\n", tree=merged_tree.id, merge_heads=[branch1.id]
+ )
+
+ assert merge_commit.decode() not in revisions
+ revisions[merge_commit.decode()] = merged_tree.id.decode()
res = self.load()
self.assertEqual(res["status"], "eventful")
self.assertCountContents(4 + 1)
self.assertCountDirectories(7 + 2)
- self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountReleases(0) # FIXME: should be 2 after T2059
self.assertCountRevisions(7 + 2)
self.assertCountSnapshots(1 + 1 + 1)
self.assertRevisionsContain(revisions)
- # TODO: how to check the snapshot id?
- # self.assertSnapshotEqual(SNAPSHOT1)
-
self.assertEqual(self.loader.load_status(), {"status": "eventful"})
self.assertEqual(self.loader.visit_status(), "full")
visit = self.storage.origin_visit_get_latest(self.repo_url)
self.assertIsNotNone(visit["snapshot"])
self.assertEqual(visit["status"], "full")
+ merge_snapshot_id = visit["snapshot"]
+ assert merge_snapshot_id != snapshot_id
+
+ merge_snapshot = self.storage.snapshot_get(merge_snapshot_id)
+ merge_branches = merge_snapshot["branches"]
+ assert merge_branches[b"HEAD"] == {
+ "target": b"refs/heads/master",
+ "target_type": "alias",
+ }
+ assert merge_branches[b"refs/heads/master"] == {
+ "target": hash_to_bytes(merge_commit.decode()),
+ "target_type": "revision",
+ }
+
class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderFromDiskTests):
"""Tests for GitLoaderFromArchive. Imports the common ones
from GitLoaderTests."""
pass

File Metadata

Mime Type
text/x-diff
Expires
Thu, Sep 18, 4:52 PM (1 d, 16 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3327591

Event Timeline