Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/dumb.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from __future__ import annotations | from __future__ import annotations | ||||
from collections import defaultdict | from collections import defaultdict | ||||
import logging | import logging | ||||
import stat | import stat | ||||
import struct | import struct | ||||
from tempfile import SpooledTemporaryFile | from tempfile import SpooledTemporaryFile | ||||
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Set, cast | from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Set, cast | ||||
import urllib.parse | import urllib.parse | ||||
from dulwich.errors import NotGitRepository | from dulwich.errors import NotGitRepository | ||||
from dulwich.objects import S_IFGITLINK, Commit, ShaFile, Tree | from dulwich.objects import S_IFGITLINK, Commit, ShaFile, Tree | ||||
from dulwich.pack import Pack, PackData, PackIndex, load_pack_index_file | from dulwich.pack import Pack, PackData, PackIndex, load_pack_index_file | ||||
import requests | import requests | ||||
from swh.loader.git.utils import HexBytes | from swh.loader.git.utils import HexBytes | ||||
if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
from .loader import RepoRepresentation | from .loader import BaseRepoRepresentation | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
HEADERS = {"User-Agent": "Software Heritage dumb Git loader"} | HEADERS = {"User-Agent": "Software Heritage dumb Git loader"} | ||||
def check_protocol(repo_url: str) -> bool: | def check_protocol(repo_url: str) -> bool: | ||||
Show All 31 Lines | class GitObjectsFetcher: | ||||
Fetches a set of git objects for a repository according to its archival | Fetches a set of git objects for a repository according to its archival | ||||
state by Software Heritage and provides iterators on them. | state by Software Heritage and provides iterators on them. | ||||
Args: | Args: | ||||
repo_url: Base URL of a git repository | repo_url: Base URL of a git repository | ||||
base_repo: State of repository archived by Software Heritage | base_repo: State of repository archived by Software Heritage | ||||
""" | """ | ||||
def __init__(self, repo_url: str, base_repo: RepoRepresentation): | def __init__(self, repo_url: str, base_repo: BaseRepoRepresentation): | ||||
self._session = requests.Session() | self._session = requests.Session() | ||||
self.repo_url = repo_url | self.repo_url = repo_url | ||||
self.base_repo = base_repo | self.base_repo = base_repo | ||||
self.objects: Dict[bytes, Set[bytes]] = defaultdict(set) | self.objects: Dict[bytes, Set[bytes]] = defaultdict(set) | ||||
self.refs = self._get_refs() | self.refs = self._get_refs() | ||||
self.head = self._get_head() if self.refs else {} | self.head = self._get_head() if self.refs else {} | ||||
self.packs = self._get_packs() | self.packs = self._get_packs() | ||||
▲ Show 20 Lines • Show All 124 Lines • Show Last 20 Lines |