Page MenuHomeSoftware Heritage

D7071.id25801.diff
No OneTemporary

D7071.id25801.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,4 +2,4 @@
swh.model >= 3.0.0
swh.objstorage >= 0.0.17
swh.scheduler >= 0.7.0
-swh.storage >= 0.29.0
+swh.storage >= 0.43.1
diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -422,7 +422,9 @@
# swh-graph, fall back to self.storage.revision_log.
# self.storage.revision_log also gives us the full revisions,
# so we load them right now instead of just pushing them on the stack.
- walker = DFSRevisionsWalker(self.storage, obj_id, state=self._walker_state)
+ walker = DFSRevisionsWalker(
+ self.storage, obj_id, state=self._walker_state, ignore_displayname=True
+ )
for revision in walker:
self.write_revision_node(Revision.from_dict(revision))
self.nb_loaded += 1
@@ -523,7 +525,9 @@
def load_revisions(self, obj_ids: List[Sha1Git]) -> None:
"""Given a list of revision ids, loads these revisions and their directories;
but not their parent revisions (ie. this is not recursive)."""
- ret: List[Optional[Revision]] = self.storage.revision_get(obj_ids)
+ ret: List[Optional[Revision]] = self.storage.revision_get(
+ obj_ids, ignore_displayname=True
+ )
revisions: List[Revision] = list(filter(None, ret))
if len(ret) != len(revisions):
@@ -540,7 +544,7 @@
def load_releases(self, obj_ids: List[Sha1Git]) -> List[Release]:
"""Loads release objects, and returns them."""
- ret = self.storage.release_get(obj_ids)
+ ret = self.storage.release_get(obj_ids, ignore_displayname=True)
releases = list(filter(None, ret))
if len(ret) != len(releases):
diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py
--- a/swh/vault/tests/test_cookers.py
+++ b/swh/vault/tests/test_cookers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py
--- a/swh/vault/tests/test_git_bare_cooker.py
+++ b/swh/vault/tests/test_git_bare_cooker.py
@@ -17,6 +17,7 @@
import unittest.mock
import attr
+import dulwich.repo
import pytest
from pytest import param
@@ -33,6 +34,7 @@
Snapshot,
SnapshotBranch,
TargetType,
+ Timestamp,
TimestampWithTimezone,
)
from swh.vault.cookers.git_bare import GitBareCooker
@@ -432,3 +434,134 @@
)
assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n"
+
+
+@pytest.mark.parametrize(
+ "use_graph",
+ [
+ pytest.param(False, id="without-graph"),
+ pytest.param(True, id="with-graph", marks=pytest.mark.graph),
+ ],
+)
+def test_ignore_displayname(swh_storage, use_graph):
+ """Tests the original authorship information is used instead of
+ configured display names; otherwise objects would not match their hash,
+ and git-fsck/git-clone would fail.
+
+ This tests both with and without swh-graph, as both configurations use different
+ code paths to fetch revisions.
+ """
+
+ date = TimestampWithTimezone.from_numeric_offset(Timestamp(1643882820, 0), 0, False)
+ legacy_person = Person.from_fullname(b"old me <old@example.org>")
+ current_person = Person.from_fullname(b"me <me@example.org>")
+
+ content = Content.from_data(b"foo")
+ swh_storage.content_add([content])
+
+ directory = Directory(
+ entries=(
+ DirectoryEntry(
+ name=b"file1", type="file", perms=0o100644, target=content.sha1_git
+ ),
+ ),
+ )
+ swh_storage.directory_add([directory])
+
+ revision = Revision(
+ message=b"rev",
+ author=legacy_person,
+ date=date,
+ committer=legacy_person,
+ committer_date=date,
+ parents=(),
+ type=RevisionType.GIT,
+ directory=directory.id,
+ synthetic=True,
+ )
+ swh_storage.revision_add([revision])
+
+ release = Release(
+ name=b"v1.1.0",
+ message=None,
+ author=legacy_person,
+ date=date,
+ target=revision.id,
+ target_type=ObjectType.REVISION,
+ synthetic=True,
+ )
+ swh_storage.release_add([release])
+
+ snapshot = Snapshot(
+ branches={
+ b"refs/tags/v1.1.0": SnapshotBranch(
+ target=release.id, target_type=TargetType.RELEASE
+ ),
+ b"HEAD": SnapshotBranch(
+ target=revision.id, target_type=TargetType.REVISION
+ ),
+ }
+ )
+ swh_storage.snapshot_add([snapshot])
+
+ # Add all objects to graph
+ if use_graph:
+ from swh.graph.naive_client import NaiveClient as GraphClient
+
+ nodes = [
+ str(x.swhid()) for x in [content, directory, revision, release, snapshot]
+ ]
+ edges = [
+ (str(x.swhid()), str(y.swhid()))
+ for (x, y) in [
+ (directory, content),
+ (revision, directory),
+ (release, revision),
+ (snapshot, release),
+ (snapshot, revision),
+ ]
+ ]
+ swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges))
+ else:
+ swh_graph = None
+
+ # Set a display name
+ with swh_storage.db() as db:
+ with db.transaction() as cur:
+ cur.execute(
+ "UPDATE person set displayname = %s where fullname = %s",
+ (current_person.fullname, legacy_person.fullname),
+ )
+
+ # Check the display name did apply in the storage
+ assert swh_storage.revision_get([revision.id])[0] == attr.evolve(
+ revision, author=current_person, committer=current_person,
+ )
+
+ # Cook
+ cooked_swhid = snapshot.swhid()
+ backend = InMemoryVaultBackend()
+ cooker = GitBareCooker(
+ cooked_swhid, backend=backend, storage=swh_storage, graph=swh_graph,
+ )
+
+ cooker.cook()
+
+ # Get bundle
+ bundle = backend.fetch("git_bare", cooked_swhid)
+
+ # Extract bundle and make sure both revisions are in it
+ with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
+ with tarfile.open(fileobj=io.BytesIO(bundle)) as tf:
+ tf.extractall(tempdir)
+
+ # If we are here, it means git-fsck succeeded when called by cooker.cook(),
+ # so we already know the original person was used. Let's double-check.
+
+ repo = dulwich.repo.Repo(f"{tempdir}/{cooked_swhid}.git")
+
+ tag = repo[b"refs/tags/v1.1.0"]
+ assert tag.tagger == legacy_person.fullname
+
+ commit = repo[tag.object[1]]
+ assert commit.author == legacy_person.fullname

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 12:13 AM (19 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221497

Event Timeline