diff --git a/swh/loader/git/from_disk.py b/swh/loader/git/from_disk.py --- a/swh/loader/git/from_disk.py +++ b/swh/loader/git/from_disk.py @@ -241,8 +241,19 @@ else: branches[ref] = None + dangling_branches = {} for ref, target in self.repo.refs.get_symrefs().items(): - branches[ref] = SnapshotBranch(target=target, target_type=TargetType.ALIAS,) + branches[ref] = SnapshotBranch(target=target, target_type=TargetType.ALIAS) + if target not in branches: + # This handles the case where the pointer is "dangling". + # There's a chance that a further symbolic reference will + # override this default value, which is totally fine. + dangling_branches[target] = ref + branches[target] = None + + utils.warn_dangling_branches( + branches, dangling_branches, self.log, self.origin_url + ) self.snapshot = Snapshot(branches=branches) return self.snapshot diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -32,7 +32,7 @@ from swh.loader.core.loader import DVCSLoader from swh.storage.algos.snapshot import snapshot_get_all_branches -from . import converters +from . import converters, utils def ignore_branch_name(branch_name: bytes) -> bool: @@ -456,11 +456,18 @@ # and we can get it from the base snapshot later. unfetched_refs[ref_name] = target + dangling_branches = {} # Handle symbolic references as alias branches for ref_name, target in self.symbolic_refs.items(): branches[ref_name] = SnapshotBranch( target_type=TargetType.ALIAS, target=target, ) + if target not in branches and target not in unfetched_refs: + # This handles the case where the pointer is "dangling". + # There's a chance that a further symbolic reference + # override this default value, which is totally fine. + dangling_branches[target] = ref_name + branches[target] = None if unfetched_refs: # Handle inference of object types from the contents of the @@ -492,6 +499,10 @@ ) ) + utils.warn_dangling_branches( + branches, dangling_branches, self.log, self.origin_url + ) + self.snapshot = Snapshot(branches=branches) return self.snapshot diff --git a/swh/loader/git/tests/test_from_disk.py b/swh/loader/git/tests/test_from_disk.py --- a/swh/loader/git/tests/test_from_disk.py +++ b/swh/loader/git/tests/test_from_disk.py @@ -314,6 +314,33 @@ "target_type": "revision", } + def test_load_dangling_symref(self): + with open(os.path.join(self.destination_path, ".git/HEAD"), "wb") as f: + f.write(b"ref: refs/heads/dangling-branch\n") + + res = self.load() + self.assertEqual(res["status"], "eventful", res) + + self.assertContentsContain(CONTENT1) + self.assertCountDirectories(7) + self.assertCountReleases(0) # FIXME: should be 2 after T2059 + self.assertCountRevisions(7) + self.assertCountSnapshots(1) + + visit = self.storage.origin_visit_get_latest(self.repo_url) + snapshot_id = visit["snapshot"] + assert snapshot_id is not None + assert visit["status"] == "full" + + snapshot = self.storage.snapshot_get(snapshot_id) + branches = snapshot["branches"] + + assert branches[b"HEAD"] == { + "target": b"refs/heads/dangling-branch", + "target_type": "alias", + } + assert branches[b"refs/heads/dangling-branch"] is None + class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderFromDiskTests): """Tests for GitLoaderFromArchive. Imports the common ones diff --git a/swh/loader/git/utils.py b/swh/loader/git/utils.py --- a/swh/loader/git/utils.py +++ b/swh/loader/git/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,11 +6,14 @@ """Utilities helper functions""" import datetime +import logging import os import shutil import tempfile +from typing import Dict, Optional from swh.core import tarball +from swh.model.model import SnapshotBranch def init_git_repo_from_archive(project_name, archive_path, root_temp_dir="/tmp"): @@ -73,3 +76,27 @@ if not timestamp: return None datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc) + + +def warn_dangling_branches( + branches: Dict[bytes, Optional[SnapshotBranch]], + dangling_branches: Dict[bytes, bytes], + logger: logging.Logger, + origin_url: str, +) -> None: + dangling_branches = { + target: ref for target, ref in dangling_branches.items() if not branches[target] + } + + if dangling_branches: + descr = [f"{ref!r}->{target!r}" for target, ref in dangling_branches.items()] + + logger.warning( + "Dangling symbolic references: %s", + ", ".join(descr), + extra={ + "swh_type": "swh_loader_git_dangling_symrefs", + "swh_refs": descr, + "origin_url": origin_url, + }, + )