diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,5 @@ include requirements*.txt include version.txt recursive-include swh/loader/git/tests/data * +recursive-include swh/loader/git/tests/data *.bundle *.tgz recursive-include swh py.typed diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.7 swh.loader.core >= 0.5.2 -swh.model >= 0.3.0 +swh.model >= 0.4.0 swh.scheduler >= 0.0.39 -swh.storage >= 0.7.0 +swh.storage >= 0.10.0 diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py --- a/swh/loader/git/converters.py +++ b/swh/loader/git/converters.py @@ -5,7 +5,7 @@ """Convert dulwich objects to dictionaries suitable for swh.storage""" -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash from swh.model.model import ( @@ -105,25 +105,21 @@ git_metadata = [] if commit.encoding is not None: - git_metadata.append(["encoding", commit.encoding]) + git_metadata.append((b"encoding", commit.encoding)) if commit.mergetag: for mergetag in commit.mergetag: raw_string = mergetag.as_raw_string() assert raw_string.endswith(b"\n") - git_metadata.append(["mergetag", raw_string[:-1]]) + git_metadata.append((b"mergetag", raw_string[:-1])) if commit.extra: - git_metadata.extend([k.decode("utf-8"), v] for k, v in commit.extra) + git_metadata.extend((k, v) for k, v in commit.extra) if commit.gpgsig: - git_metadata.append(["gpgsig", commit.gpgsig]) + git_metadata.append((b"gpgsig", commit.gpgsig)) - if git_metadata: - metadata: Optional[Dict[str, Any]] = { - "extra_headers": git_metadata, - } - else: - metadata = None + extra_headers: Tuple[Tuple[bytes, bytes], ...] + extra_headers = tuple(git_metadata) return Revision( id=commit.sha().digest(), @@ -138,7 +134,8 @@ type=RevisionType.GIT, directory=bytes.fromhex(commit.tree.decode()), message=commit.message, - metadata=metadata, + metadata=None, + extra_headers=extra_headers, synthetic=False, parents=tuple(bytes.fromhex(p.decode()) for p in commit.parents), ) diff --git a/swh/loader/git/tests/data/git-repos/example-submodule.bundle b/swh/loader/git/tests/data/git-repos/example-submodule.bundle new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ 1594138133 +0200\n" + b"\n" + b"v0.0.1\n" + b"-----BEGIN PGP SIGNATURE-----\n" + b"\n" + b"iQJLBAABCAA1FiEEAOWDevQbOk/9ITMF6ImSleOlnUcFAl8EnhkXHGRhdmlkLmRv\n" + b"dWFyZEBzZGZhMy5vcmcACgkQ6ImSleOlnUcdzg//ZW9y2xU5JFQuUsBe/LfKrs+m\n" + b"0ohVInPKXwAfpB3+gn/XtTSLe+Nnr8+QEZyVRCUz2gpGZ2tNqRjhYLIX4x5KKlaV\n" + b"rfl/6Cy7zibsxxuzA1h7HylCs3IPsueQpznVHUwD9jQ5baGJSc2Lt1LufXTueHZJ\n" + b"Oc0oLiP5xCZcPqeX8R/4zUUImJZ1QrPeKmQ/3F+Iq62iWp7nWDp8PtwpykSiYlNf\n" + b"KrJM8omGvrlrWLtfPNUaQFClXwnwK1/HyNY2kYan6K5NtsIl2UX0LZ42GkRjJIrb\n" + b"q4TFIZWZ6xndtEhHEX6B8Q5TZV6sqPgNnfGpbhj8BDoZgjD0Y43fzfDiZ0Bl2tph\n" + b"tXaLg3SX/UUjFVzC1zkoQ2MR7+j8NVKauAsBINpKF4pMGsrsVRk8764pgO49iQ+S\n" + b"8JVCVV76dNNm1gd7BbhFAdIAiegBtsEF69niJBoHKYLlrT8E8hDkF/gk4IkimPqf\n" + b"UHtw/fPhVW3B4G2skd013NJGcnRj5oKtaM99d2Roxc3vhSRiTsoaM8BM9NDvLmJg\n" + b"35rWEOnet39iJIMCHk3AYaJl8QmUhllDdr6vygaBVeVEf27m2c3NzONmIKpWqa2J\n" + b"kTpF4cmzHYro34G7WuJ1bYvmLb6qWNQt9wd8RW+J1kVm5I8dkjPzLUougBpOd0YL\n" + b"Bl5UTQILbV4Tv8ZlmJM=\n" + b"=s1lv\n" + b"-----END PGP SIGNATURE-----" +) class SWHObjectType: @@ -75,23 +121,17 @@ def setUpClass(cls): super().setUpClass() cls.repo_path = tempfile.mkdtemp() - cls.repo = dulwich.repo.Repo.init_bare(cls.repo_path) - fast_export = os.path.join( - TEST_DATA, "git-repos", "example-submodule.fast-export.xz" - ) - - xz = subprocess.Popen( - ["xzcat"], stdin=open(fast_export, "rb"), stdout=subprocess.PIPE, - ) + bundle = os.path.join(TEST_DATA, "git-repos", "example-submodule.bundle") git = subprocess.Popen( - ["git", "fast-import", "--quiet"], stdin=xz.stdout, cwd=cls.repo_path, + ["git", "clone", "--quiet", "--bare", "--mirror", bundle, cls.repo_path], + cwd=TEST_DATA, ) # flush stdout of xz - xz.stdout.close() git.communicate() + cls.repo = dulwich.repo.Repo(cls.repo_path) @classmethod def tearDownClass(cls): @@ -142,7 +182,6 @@ sha1 = b"9768d0b576dbaaecd80abedad6dfd0d72f1476da" revision = converters.dulwich_commit_to_revision(self.repo[sha1]) - expected_revision = Revision( id=hash_to_bytes("9768d0b576dbaaecd80abedad6dfd0d72f1476da"), directory=b"\xf0i\\./\xa7\xce\x9dW@#\xc3A7a\xa4s\xe5\x00\xca", @@ -164,6 +203,7 @@ ), message=b"add submodule dependency\n", metadata=None, + extra_headers=(), date=TimestampWithTimezone( timestamp=Timestamp(seconds=1443083765, microseconds=0,), negative_utc=False, @@ -175,6 +215,83 @@ self.assertEqual(revision, expected_revision) + def test_commit_to_revision_with_extra_headers(self): + sha1 = b"322f5bc915e50fc25e85226b5a182bded0e98e4b" + + revision = converters.dulwich_commit_to_revision(self.repo[sha1]) + expected_revision = Revision( + id=hash_to_bytes(sha1.decode()), + directory=bytes.fromhex("f8ec06e4ed7b9fff4918a0241a48023143f30000"), + type=RevisionType.GIT, + committer=Person( + name=b"David Douard", + fullname=b"David Douard ", + email=b"david.douard@sdfa3.org", + ), + author=Person( + name=b"David Douard", + fullname=b"David Douard ", + email=b"david.douard@sdfa3.org", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1594137902, microseconds=0,), + negative_utc=False, + offset=120, + ), + message=b"Am\xe9lioration du fichier READM\xa4\n", + metadata=None, + extra_headers=((b"encoding", b"ISO-8859-15"), (b"gpgsig", GPGSIG)), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1594136900, microseconds=0,), + negative_utc=False, + offset=120, + ), + parents=(bytes.fromhex("c730509025c6e81947102b2d77bc4dc1cade9489"),), + synthetic=False, + ) + + assert revision == expected_revision + + def test_commit_to_revision_with_extra_headers_mergetag(self): + sha1 = b"3ab3da4bf0f81407be16969df09cd1c8af9ac703" + + revision = converters.dulwich_commit_to_revision(self.repo[sha1]) + expected_revision = Revision( + id=hash_to_bytes(sha1.decode()), + directory=bytes.fromhex("faa4b64a841ca3e3f07d6501caebda2e3e8e544e"), + type=RevisionType.GIT, + committer=Person( + name=b"David Douard", + fullname=b"David Douard ", + email=b"david.douard@sdfa3.org", + ), + author=Person( + name=b"David Douard", + fullname=b"David Douard ", + email=b"david.douard@sdfa3.org", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1594138183, microseconds=0,), + negative_utc=False, + offset=120, + ), + message=b"Merge tag 'v0.0.1' into readme\n\nv0.0.1\n", + metadata=None, + extra_headers=((b"encoding", b"ISO-8859-15"), (b"mergetag", MERGETAG)), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1594138183, microseconds=0,), + negative_utc=False, + offset=120, + ), + parents=( + bytes.fromhex("322f5bc915e50fc25e85226b5a182bded0e98e4b"), + bytes.fromhex("9768d0b576dbaaecd80abedad6dfd0d72f1476da"), + ), + synthetic=False, + ) + + assert revision == expected_revision + def test_author_line_to_author(self): # edge case out of the way with self.assertRaises(TypeError):