diff --git a/swh/loader/git/from_disk.py b/swh/loader/git/from_disk.py --- a/swh/loader/git/from_disk.py +++ b/swh/loader/git/from_disk.py @@ -17,6 +17,7 @@ # dulwich >= 0.20 from dulwich.objects import EmptyFileException +import dulwich.objects import dulwich.repo from swh.loader.core.loader import DVCSLoader @@ -28,6 +29,60 @@ from . import converters, utils +def _check_tag(tag): + """Copy-paste of dulwich.objects.Tag, minus the tagger and time checks, + which are too strict and error on old tags.""" + # Copyright (C) 2007 James Westby + # Copyright (C) 2008-2013 Jelmer Vernooij + # + # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU + # General Public License as public by the Free Software Foundation; version 2.0 + # or (at your option) any later version. You can redistribute it and/or + # modify it under the terms of either of these two licenses. + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # + # You should have received a copy of the licenses; if not, see + # for a copy of the GNU General Public License + # and for a copy of the Apache + # License, Version 2.0. + dulwich.objects.ShaFile.check(tag) + tag._check_has_member("_object_sha", "missing object sha") + tag._check_has_member("_object_class", "missing object type") + tag._check_has_member("_name", "missing tag name") + + if not tag._name: + raise ObjectFormatException("empty tag name") + + dulwich.objects.check_hexsha(tag._object_sha, "invalid object sha") + + if tag._tag_time is not None: + dulwich.objects.check_time(tag._tag_time) + + from dulwich.objects import ( + _OBJECT_HEADER, + _TAG_HEADER, + _TAGGER_HEADER, + _TYPE_HEADER, + ) + + last = None + for field, _ in dulwich.objects._parse_message(tag._chunked_text): + if field == _OBJECT_HEADER and last is not None: + raise ObjectFormatException("unexpected object") + elif field == _TYPE_HEADER and last != _OBJECT_HEADER: + raise ObjectFormatException("unexpected type") + elif field == _TAG_HEADER and last != _TYPE_HEADER: + raise ObjectFormatException("unexpected tag name") + elif field == _TAGGER_HEADER and last != _TAG_HEADER: + raise ObjectFormatException("unexpected tagger") + last = field + + class GitLoaderFromDisk(DVCSLoader): """Load a git repository from a directory. @@ -81,20 +136,23 @@ obj (object): Dulwich object read from the repository. """ - obj.check() - from dulwich.objects import Commit, Tag + if isinstance(obj, dulwich.objects.Tag): + _check_tag(obj) + else: + obj.check() try: # For additional checks on dulwich objects with date # for now, only checks on *time - if isinstance(obj, Commit): + if isinstance(obj, dulwich.objects.Commit): commit_time = obj._commit_time utils.check_date_time(commit_time) author_time = obj._author_time utils.check_date_time(author_time) - elif isinstance(obj, Tag): + elif isinstance(obj, dulwich.objects.Tag): tag_time = obj._tag_time - utils.check_date_time(tag_time) + if tag_time: + utils.check_date_time(tag_time) except Exception as e: raise ObjectFormatException(e) diff --git a/swh/loader/git/tests/test_from_disk.py b/swh/loader/git/tests/test_from_disk.py --- a/swh/loader/git/tests/test_from_disk.py +++ b/swh/loader/git/tests/test_from_disk.py @@ -8,6 +8,8 @@ import os.path from unittest import TestCase +import dulwich.objects +import dulwich.porcelain import dulwich.repo import pytest @@ -18,8 +20,8 @@ get_stats, prepare_repository_from_archive, ) -from swh.model.hashutil import hash_to_bytes -from swh.model.model import Snapshot, SnapshotBranch, TargetType +from swh.model.hashutil import bytehex_to_hash, hash_to_bytes +from swh.model.model import ObjectType, Release, Snapshot, SnapshotBranch, TargetType from swh.storage.algos.snapshot import snapshot_get_all_branches SNAPSHOT1 = Snapshot( @@ -426,6 +428,78 @@ assert results.next_page_token is None assert results.results == [] + def test_load_tag(self): + with open(os.path.join(self.destination_path, "hello.py"), "a") as fd: + fd.write("print('Hello world')\n") + + self.repo.stage([b"hello.py"]) + new_revision = self.repo.do_commit(b"Hello world\n") + + dulwich.porcelain.tag_create( + self.repo, + b"v1.0.0", + message=b"First release!", + annotated=True, + objectish=new_revision, + ) + + res = self.loader.load() + assert res == {"status": "eventful"} + + branches = self.loader.storage.snapshot_get_branches(self.loader.snapshot.id) + + print(list(branches["branches"])) + branch = branches["branches"][b"refs/tags/v1.0.0"] + assert branch.target_type == TargetType.RELEASE + + release = self.loader.storage.release_get([branch.target])[0] + assert release.date is not None + assert release.author is not None + assert release == Release( + name=b"v1.0.0", + message=b"First release!\n", + target_type=ObjectType.REVISION, + target=bytehex_to_hash(new_revision), + author=release.author, + date=release.date, + synthetic=False, + ) + + def test_load_tag_minimal(self): + with open(os.path.join(self.destination_path, "hello.py"), "a") as fd: + fd.write("print('Hello world')\n") + + self.repo.stage([b"hello.py"]) + new_revision = self.repo.do_commit(b"Hello world\n") + + # dulwich.porcelain.tag_create doesn't allow creating tags without + # a tagger or a date, so we have to create it "manually" + tag = dulwich.objects.Tag() + tag.message = b"First release!\n" + tag.name = b"v1.0.0" + tag.object = (dulwich.objects.Commit, new_revision) + self.repo.object_store.add_object(tag) + self.repo[b"refs/tags/v1.0.0"] = tag.id + + res = self.loader.load() + assert res == {"status": "eventful"} + + branches = self.loader.storage.snapshot_get_branches(self.loader.snapshot.id) + + print(list(branches["branches"])) + branch = branches["branches"][b"refs/tags/v1.0.0"] + assert branch.target_type == TargetType.RELEASE + + release = self.loader.storage.release_get([branch.target])[0] + assert release == Release( + id=bytehex_to_hash(tag.id), + name=b"v1.0.0", + message=b"First release!\n", + target_type=ObjectType.REVISION, + target=bytehex_to_hash(new_revision), + synthetic=False, + ) + class GitLoaderFromDiskTest(TestCase, FullGitLoaderTests): """Prepare a git directory repository to be loaded through a GitLoaderFromDisk.