Page MenuHomeSoftware Heritage

D6923.id25233.diff
No OneTemporary

D6923.id25233.diff

diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py
--- a/swh/loader/git/converters.py
+++ b/swh/loader/git/converters.py
@@ -120,6 +120,20 @@
)
dir_ = Directory(id=tree.sha().digest(), entries=tuple(entries),)
+
+ if dir_.compute_hash() != dir_.id:
+ expected_id = dir_.id
+ actual_id = dir_.compute_hash()
+ logger.warning(
+ "Expected directory to have id %s, but got %s. Recording raw_manifest.",
+ hash_to_hex(expected_id),
+ hash_to_hex(actual_id),
+ )
+ raw_string = tree.as_raw_string()
+ dir_ = attr.evolve(
+ dir_, raw_manifest=git_object_header("tree", len(raw_string)) + raw_string
+ )
+
check_id(dir_)
return dir_
diff --git a/swh/loader/git/tests/test_converters.py b/swh/loader/git/tests/test_converters.py
--- a/swh/loader/git/tests/test_converters.py
+++ b/swh/loader/git/tests/test_converters.py
@@ -171,16 +171,54 @@
_callable(Something())
def test_corrupt_tree(self):
- # has a signature
- sha1 = b"f0695c2e2fa7ce9d574023c3413761a473e500ca"
- tree = copy.deepcopy(self.repo[sha1])
+ sha1 = b"a9b41fc6347d778f16c4380b598d8083e9b4c1fb"
+ target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"
+ tree = dulwich.objects.Tree()
+ tree.add(b"file1", 0o644, target)
+ assert tree.sha().hexdigest() == sha1.decode()
converters.dulwich_tree_to_directory(tree)
- del tree._entries[next(iter(tree._entries))]
+ original_sha = tree.sha()
+
+ tree.add(b"file2", 0o644, target)
+ tree.sha() # reset tree._needs_serialization
+ tree._sha = original_sha # force the wrong hash
+ assert tree.sha().hexdigest() == sha1.decode()
with pytest.raises(converters.HashMismatch):
converters.dulwich_tree_to_directory(tree)
+ def test_weird_tree(self):
+ """Tests a tree with entries the wrong order"""
+
+ raw_manifest = (
+ b"0644 file2\x00"
+ b"d\x1f\xb6\xe0\x8d\xdb.O\xd0\x96\xdc\xf1\x8e\x80\xb8\x94\xbf~%\xce"
+ b"0644 file1\x00"
+ b"d\x1f\xb6\xe0\x8d\xdb.O\xd0\x96\xdc\xf1\x8e\x80\xb8\x94\xbf~%\xce"
+ )
+
+ tree = dulwich.objects.Tree.from_raw_string(b"tree", raw_manifest)
+
+ assert converters.dulwich_tree_to_directory(tree) == Directory(
+ entries=(
+ # in alphabetical order, as it should be
+ DirectoryEntry(
+ name=b"file1",
+ type="file",
+ target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"),
+ perms=0o644,
+ ),
+ DirectoryEntry(
+ name=b"file2",
+ type="file",
+ target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"),
+ perms=0o644,
+ ),
+ ),
+ raw_manifest=b"tree 62\x00" + raw_manifest,
+ )
+
def test_tree_perms(self):
entries = [
(b"blob_100644", 0o100644, "file"),

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:46 AM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216273

Event Timeline