diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py --- a/swh/loader/git/converters.py +++ b/swh/loader/git/converters.py @@ -120,6 +120,20 @@ ) dir_ = Directory(id=tree.sha().digest(), entries=tuple(entries),) + + if dir_.compute_hash() != dir_.id: + expected_id = dir_.id + actual_id = dir_.compute_hash() + logger.warning( + "Expected directory to have id %s, but got %s. Recording raw_manifest.", + hash_to_hex(expected_id), + hash_to_hex(actual_id), + ) + raw_string = tree.as_raw_string() + dir_ = attr.evolve( + dir_, raw_manifest=git_object_header("tree", len(raw_string)) + raw_string + ) + check_id(dir_) return dir_ diff --git a/swh/loader/git/tests/test_converters.py b/swh/loader/git/tests/test_converters.py --- a/swh/loader/git/tests/test_converters.py +++ b/swh/loader/git/tests/test_converters.py @@ -171,16 +171,54 @@ _callable(Something()) def test_corrupt_tree(self): - # has a signature - sha1 = b"f0695c2e2fa7ce9d574023c3413761a473e500ca" - tree = copy.deepcopy(self.repo[sha1]) + sha1 = b"a9b41fc6347d778f16c4380b598d8083e9b4c1fb" + target = b"641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce" + tree = dulwich.objects.Tree() + tree.add(b"file1", 0o644, target) + assert tree.sha().hexdigest() == sha1.decode() converters.dulwich_tree_to_directory(tree) - del tree._entries[next(iter(tree._entries))] + original_sha = tree.sha() + + tree.add(b"file2", 0o644, target) + tree.sha() # reset tree._needs_serialization + tree._sha = original_sha # force the wrong hash + assert tree.sha().hexdigest() == sha1.decode() with pytest.raises(converters.HashMismatch): converters.dulwich_tree_to_directory(tree) + def test_weird_tree(self): + """Tests a tree with entries the wrong order""" + + raw_manifest = ( + b"0644 file2\x00" + b"d\x1f\xb6\xe0\x8d\xdb.O\xd0\x96\xdc\xf1\x8e\x80\xb8\x94\xbf~%\xce" + b"0644 file1\x00" + b"d\x1f\xb6\xe0\x8d\xdb.O\xd0\x96\xdc\xf1\x8e\x80\xb8\x94\xbf~%\xce" + ) + + tree = dulwich.objects.Tree.from_raw_string(b"tree", raw_manifest) + + assert converters.dulwich_tree_to_directory(tree) == Directory( + entries=( + # in alphabetical order, as it should be + DirectoryEntry( + name=b"file1", + type="file", + target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"), + perms=0o644, + ), + DirectoryEntry( + name=b"file2", + type="file", + target=hash_to_bytes("641fb6e08ddb2e4fd096dcf18e80b894bf7e25ce"), + perms=0o644, + ), + ), + raw_manifest=b"tree 62\x00" + raw_manifest, + ) + def test_tree_perms(self): entries = [ (b"blob_100644", 0o100644, "file"),