Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124718
D6901.id25038.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
D6901.id25038.diff
View Options
diff --git a/swh/loader/git/dumb.py b/swh/loader/git/dumb.py
--- a/swh/loader/git/dumb.py
+++ b/swh/loader/git/dumb.py
@@ -11,12 +11,12 @@
import struct
from tempfile import SpooledTemporaryFile
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Set, cast
+import urllib.parse
-from dulwich.client import HttpGitClient
from dulwich.errors import NotGitRepository
from dulwich.objects import S_IFGITLINK, Commit, ShaFile, Tree
from dulwich.pack import Pack, PackData, PackIndex, load_pack_index_file
-from urllib3.response import HTTPResponse
+import requests
from swh.loader.git.utils import HexBytes
@@ -26,18 +26,7 @@
logger = logging.getLogger(__name__)
-class DumbHttpGitClient(HttpGitClient):
- """Simple wrapper around dulwich.client.HTTPGitClient
- """
-
- def __init__(self, base_url: str):
- super().__init__(base_url)
- self.user_agent = "Software Heritage dumb Git loader"
-
- def get(self, url: str) -> HTTPResponse:
- logger.debug("Fetching %s", url)
- response, _ = self._http_request(url, headers={"User-Agent": self.user_agent})
- return response
+HEADERS = {"User-Agent": "Software Heritage dumb Git loader"}
def check_protocol(repo_url: str) -> bool:
@@ -52,12 +41,11 @@
"""
if not repo_url.startswith("http"):
return False
- http_client = DumbHttpGitClient(repo_url)
- url = http_client.get_url("info/refs?service=git-upload-pack")
- response = http_client.get(url)
- content_type = response.getheader("Content-Type")
+ url = urllib.parse.urljoin(repo_url, "info/refs?service=git-upload-pack/")
+ response = requests.get(url, headers=HEADERS)
+ content_type = response.headers.get("Content-Type")
return (
- response.status in (200, 304,)
+ response.status_code in (200, 304,)
# header is not mandatory in protocol specification
and (content_type is None or not content_type.startswith("application/x-git-"))
)
@@ -75,7 +63,8 @@
"""
def __init__(self, repo_url: str, base_repo: RepoRepresentation):
- self.http_client = DumbHttpGitClient(repo_url)
+ self._session = requests.Session()
+ self.repo_url = repo_url
self.base_repo = base_repo
self.objects: Dict[bytes, Set[bytes]] = defaultdict(set)
self.refs = self._get_refs()
@@ -124,10 +113,10 @@
return map(self._get_git_object, self.objects[object_type])
def _http_get(self, path: str) -> SpooledTemporaryFile:
- url = self.http_client.get_url(path)
- response = self.http_client.get(url)
+ url = urllib.parse.urljoin(self.repo_url, path)
+ response = self._session.get(url, headers=HEADERS)
buffer = SpooledTemporaryFile(max_size=100 * 1024 * 1024)
- buffer.write(response.data)
+ buffer.write(response.content)
buffer.flush()
buffer.seek(0)
return buffer
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 5:54 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224249
Attached To
D6901: Remove unnecessary use of dulwich.client.HttpGitClient
Event Timeline
Log In to Comment