Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066192
D7071.id25801.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D7071.id25801.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,4 +2,4 @@
swh.model >= 3.0.0
swh.objstorage >= 0.0.17
swh.scheduler >= 0.7.0
-swh.storage >= 0.29.0
+swh.storage >= 0.43.1
diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -422,7 +422,9 @@
# swh-graph, fall back to self.storage.revision_log.
# self.storage.revision_log also gives us the full revisions,
# so we load them right now instead of just pushing them on the stack.
- walker = DFSRevisionsWalker(self.storage, obj_id, state=self._walker_state)
+ walker = DFSRevisionsWalker(
+ self.storage, obj_id, state=self._walker_state, ignore_displayname=True
+ )
for revision in walker:
self.write_revision_node(Revision.from_dict(revision))
self.nb_loaded += 1
@@ -523,7 +525,9 @@
def load_revisions(self, obj_ids: List[Sha1Git]) -> None:
"""Given a list of revision ids, loads these revisions and their directories;
but not their parent revisions (ie. this is not recursive)."""
- ret: List[Optional[Revision]] = self.storage.revision_get(obj_ids)
+ ret: List[Optional[Revision]] = self.storage.revision_get(
+ obj_ids, ignore_displayname=True
+ )
revisions: List[Revision] = list(filter(None, ret))
if len(ret) != len(revisions):
@@ -540,7 +544,7 @@
def load_releases(self, obj_ids: List[Sha1Git]) -> List[Release]:
"""Loads release objects, and returns them."""
- ret = self.storage.release_get(obj_ids)
+ ret = self.storage.release_get(obj_ids, ignore_displayname=True)
releases = list(filter(None, ret))
if len(ret) != len(releases):
diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py
--- a/swh/vault/tests/test_cookers.py
+++ b/swh/vault/tests/test_cookers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py
--- a/swh/vault/tests/test_git_bare_cooker.py
+++ b/swh/vault/tests/test_git_bare_cooker.py
@@ -17,6 +17,7 @@
import unittest.mock
import attr
+import dulwich.repo
import pytest
from pytest import param
@@ -33,6 +34,7 @@
Snapshot,
SnapshotBranch,
TargetType,
+ Timestamp,
TimestampWithTimezone,
)
from swh.vault.cookers.git_bare import GitBareCooker
@@ -432,3 +434,134 @@
)
assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n"
+
+
+@pytest.mark.parametrize(
+ "use_graph",
+ [
+ pytest.param(False, id="without-graph"),
+ pytest.param(True, id="with-graph", marks=pytest.mark.graph),
+ ],
+)
+def test_ignore_displayname(swh_storage, use_graph):
+ """Tests the original authorship information is used instead of
+ configured display names; otherwise objects would not match their hash,
+ and git-fsck/git-clone would fail.
+
+ This tests both with and without swh-graph, as both configurations use different
+ code paths to fetch revisions.
+ """
+
+ date = TimestampWithTimezone.from_numeric_offset(Timestamp(1643882820, 0), 0, False)
+ legacy_person = Person.from_fullname(b"old me <old@example.org>")
+ current_person = Person.from_fullname(b"me <me@example.org>")
+
+ content = Content.from_data(b"foo")
+ swh_storage.content_add([content])
+
+ directory = Directory(
+ entries=(
+ DirectoryEntry(
+ name=b"file1", type="file", perms=0o100644, target=content.sha1_git
+ ),
+ ),
+ )
+ swh_storage.directory_add([directory])
+
+ revision = Revision(
+ message=b"rev",
+ author=legacy_person,
+ date=date,
+ committer=legacy_person,
+ committer_date=date,
+ parents=(),
+ type=RevisionType.GIT,
+ directory=directory.id,
+ synthetic=True,
+ )
+ swh_storage.revision_add([revision])
+
+ release = Release(
+ name=b"v1.1.0",
+ message=None,
+ author=legacy_person,
+ date=date,
+ target=revision.id,
+ target_type=ObjectType.REVISION,
+ synthetic=True,
+ )
+ swh_storage.release_add([release])
+
+ snapshot = Snapshot(
+ branches={
+ b"refs/tags/v1.1.0": SnapshotBranch(
+ target=release.id, target_type=TargetType.RELEASE
+ ),
+ b"HEAD": SnapshotBranch(
+ target=revision.id, target_type=TargetType.REVISION
+ ),
+ }
+ )
+ swh_storage.snapshot_add([snapshot])
+
+ # Add all objects to graph
+ if use_graph:
+ from swh.graph.naive_client import NaiveClient as GraphClient
+
+ nodes = [
+ str(x.swhid()) for x in [content, directory, revision, release, snapshot]
+ ]
+ edges = [
+ (str(x.swhid()), str(y.swhid()))
+ for (x, y) in [
+ (directory, content),
+ (revision, directory),
+ (release, revision),
+ (snapshot, release),
+ (snapshot, revision),
+ ]
+ ]
+ swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges))
+ else:
+ swh_graph = None
+
+ # Set a display name
+ with swh_storage.db() as db:
+ with db.transaction() as cur:
+ cur.execute(
+ "UPDATE person set displayname = %s where fullname = %s",
+ (current_person.fullname, legacy_person.fullname),
+ )
+
+ # Check the display name did apply in the storage
+ assert swh_storage.revision_get([revision.id])[0] == attr.evolve(
+ revision, author=current_person, committer=current_person,
+ )
+
+ # Cook
+ cooked_swhid = snapshot.swhid()
+ backend = InMemoryVaultBackend()
+ cooker = GitBareCooker(
+ cooked_swhid, backend=backend, storage=swh_storage, graph=swh_graph,
+ )
+
+ cooker.cook()
+
+ # Get bundle
+ bundle = backend.fetch("git_bare", cooked_swhid)
+
+ # Extract bundle and make sure both revisions are in it
+ with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
+ with tarfile.open(fileobj=io.BytesIO(bundle)) as tf:
+ tf.extractall(tempdir)
+
+ # If we are here, it means git-fsck succeeded when called by cooker.cook(),
+ # so we already know the original person was used. Let's double-check.
+
+ repo = dulwich.repo.Repo(f"{tempdir}/{cooked_swhid}.git")
+
+ tag = repo[b"refs/tags/v1.1.0"]
+ assert tag.tagger == legacy_person.fullname
+
+ commit = repo[tag.object[1]]
+ assert commit.author == legacy_person.fullname
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 12:13 AM (19 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221497
Attached To
D7071: git_bare: Use original author names instead of display names
Event Timeline
Log In to Comment