Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9311883
D799.id2547.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
D799.id2547.diff
View Options
diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -257,6 +257,44 @@
class GitLoaderFromArchive(GitLoader):
"""Load a git repository from an archive.
+ This loader ingests a git repository compressed into an archive.
+ The supported archive formats are ``.zip`` and ``.tar.gz``.
+
+ From an input tarball named ``my-git-repo.zip``, the following layout is
+ expected in it::
+
+ my-git-repo/
+ ├── .git
+ │ ├── branches
+ │ ├── COMMIT_EDITMSG
+ │ ├── config
+ │ ├── description
+ │ ├── HEAD
+ ...
+
+ Nevertheless, the loader is able to ingest tarballs with the following
+ layouts too::
+
+ .
+ ├── .git
+ │ ├── branches
+ │ ├── COMMIT_EDITMSG
+ │ ├── config
+ │ ├── description
+ │ ├── HEAD
+ ...
+
+ or::
+
+ other-repo-name/
+ ├── .git
+ │ ├── branches
+ │ ├── COMMIT_EDITMSG
+ │ ├── config
+ │ ├── description
+ │ ├── HEAD
+ ...
+
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -266,7 +304,12 @@
"""Compute the project name from the archive's path.
"""
- return os.path.basename(os.path.dirname(archive_path))
+ archive_name = os.path.basename(archive_path)
+ for ext in ('.zip', '.tar.gz', '.tgz'):
+ if archive_name.lower().endswith(ext):
+ archive_name = archive_name[:-len(ext)]
+ break
+ return archive_name
def prepare_origin_visit(self, origin_url, archive_path, visit_date):
self._prepare_origin_visit(origin_url, visit_date)
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -4,8 +4,6 @@
# See top-level LICENSE file for more information
import os.path
-import zipfile
-import tempfile
import subprocess
from swh.loader.git.loader import GitLoader, GitLoaderFromArchive
@@ -19,13 +17,16 @@
# We don't want the project name to be 'resources'.
return 'testrepo'
+ def parse_config_file(self, *args, **kwargs):
+ return TEST_LOADER_CONFIG
+
CONTENT1 = {
'33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d', # README v1
'349c4ff7d21f1ec0eda26f3d9284c293e3425417', # README v2
'799c11e348d39f1704022b8354502e2f81f3c037', # file1.txt
'4bdb40dfd6ec75cb730e678b5d7786e30170c5fb', # file2.txt
- }
+}
SNAPSHOT_ID = 'bdf3b06d6017e0d9ad6447a73da6ff1ae9efb8f0'
@@ -89,7 +90,7 @@
'9ca0c7d6ffa3f9f0de59fd7912e08f11308a1338',
'bd746cd1913721b269b395a56a97baf6755151c2':
'e1d0d894835f91a0f887a4bc8b16f81feefdfbd5',
- }
+}
class BaseGitLoaderTest(BaseLoaderTest):
@@ -113,7 +114,7 @@
"""
def setUp(self):
- super().setUp('testrepo.tgz', True)
+ super().setUp('testrepo.tgz', uncompress_archive=True)
self.loader = GitLoaderTest()
self.storage = self.loader.storage
@@ -124,12 +125,7 @@
directory=self.destination_path)
-class GitLoaderFromArchiveTest(GitLoaderFromArchive):
- def parse_config_file(self, *args, **kwargs):
- return TEST_LOADER_CONFIG
-
-
-class BaseZipGitLoaderTest(BaseGitLoaderTest):
+class BaseGitLoaderFromArchiveTest(BaseGitLoaderTest):
"""Mixin base loader test to prepare the git
repository to uncompress, load and test the results.
@@ -137,32 +133,10 @@
"""
def setUp(self):
- super().setUp('testrepo.tgz', True)
- self._setup_zip()
- self.loader = GitLoaderFromArchiveTest()
+ super().setUp('testrepo.tgz', uncompress_archive=False)
+ self.loader = GitLoaderFromArchive()
self.storage = self.loader.storage
- def _setup_zip(self):
- self._zip_file = tempfile.NamedTemporaryFile('ab', suffix='.zip')
- dest_dir = os.path.normpath(self.destination_path) + '/'
- with zipfile.ZipFile(self._zip_file, 'a') as zip_writer:
- for root, dirs, files in os.walk(dest_dir):
- assert root.startswith(dest_dir)
- relative_root = os.path.join(
- 'testrepo',
- root[len(dest_dir):])
- for file_ in files:
- zip_writer.write(
- filename=os.path.join(root, file_),
- arcname=os.path.join(relative_root, file_))
- self.destination_path = self._zip_file.name
- self.tmp_root_path = None
- self.repo_url = 'file://' + self.destination_path
-
- def tearDown(self):
- self._zip_file.close()
- super().tearDown()
-
def load(self):
return self.loader.load(
origin_url=self.repo_url,
@@ -281,7 +255,7 @@
self.assertEqual(self.loader.visit_status(), 'full')
-class ZipGitLoaderTest(BaseZipGitLoaderTest, GitLoaderTests):
+class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderTests):
"""Tests for GitLoaderFromArchive. Imports the common ones
from GitLoaderTests."""
pass
diff --git a/swh/loader/git/utils.py b/swh/loader/git/utils.py
--- a/swh/loader/git/utils.py
+++ b/swh/loader/git/utils.py
@@ -10,7 +10,7 @@
import shutil
import tempfile
-from subprocess import call
+from swh.core import tarball
def init_git_repo_from_archive(project_name, archive_path,
@@ -41,11 +41,18 @@
try:
# create the repository that will be loaded with the dump
- r = call(['unzip', '-q', '-o', archive_path, '-d', temp_dir])
- if r != 0:
- raise ValueError('Failed to uncompress archive %s' % archive_path)
-
+ tarball.uncompress(archive_path, temp_dir)
repo_path = os.path.join(temp_dir, project_name)
+ # tarball content may not be as expected (e.g. no top level directory
+ # or a top level directory with a name different from project_name),
+ # so try to make it loadable anyway
+ if not os.path.exists(repo_path):
+ os.mkdir(repo_path)
+ for root, dirs, files in os.walk(temp_dir):
+ if '.git' in dirs:
+ shutil.copytree(os.path.join(root, '.git'),
+ os.path.join(repo_path, '.git'))
+ break
return temp_dir, repo_path
except Exception as e:
shutil.rmtree(temp_dir)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:35 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221456
Attached To
D799: loader.git: Improve GitLoaderFromArchive
Event Timeline
Log In to Comment