Page MenuHomeSoftware Heritage

D799.id2547.diff
No OneTemporary

D799.id2547.diff

diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -257,6 +257,44 @@
class GitLoaderFromArchive(GitLoader):
"""Load a git repository from an archive.
+ This loader ingests a git repository compressed into an archive.
+ The supported archive formats are ``.zip`` and ``.tar.gz``.
+
+ From an input tarball named ``my-git-repo.zip``, the following layout is
+ expected in it::
+
+ my-git-repo/
+ ├── .git
+ │ ├── branches
+ │ ├── COMMIT_EDITMSG
+ │ ├── config
+ │ ├── description
+ │ ├── HEAD
+ ...
+
+ Nevertheless, the loader is able to ingest tarballs with the following
+ layouts too::
+
+ .
+ ├── .git
+ │ ├── branches
+ │ ├── COMMIT_EDITMSG
+ │ ├── config
+ │ ├── description
+ │ ├── HEAD
+ ...
+
+ or::
+
+ other-repo-name/
+ ├── .git
+ │ ├── branches
+ │ ├── COMMIT_EDITMSG
+ │ ├── config
+ │ ├── description
+ │ ├── HEAD
+ ...
+
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -266,7 +304,12 @@
"""Compute the project name from the archive's path.
"""
- return os.path.basename(os.path.dirname(archive_path))
+ archive_name = os.path.basename(archive_path)
+ for ext in ('.zip', '.tar.gz', '.tgz'):
+ if archive_name.lower().endswith(ext):
+ archive_name = archive_name[:-len(ext)]
+ break
+ return archive_name
def prepare_origin_visit(self, origin_url, archive_path, visit_date):
self._prepare_origin_visit(origin_url, visit_date)
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -4,8 +4,6 @@
# See top-level LICENSE file for more information
import os.path
-import zipfile
-import tempfile
import subprocess
from swh.loader.git.loader import GitLoader, GitLoaderFromArchive
@@ -19,13 +17,16 @@
# We don't want the project name to be 'resources'.
return 'testrepo'
+ def parse_config_file(self, *args, **kwargs):
+ return TEST_LOADER_CONFIG
+
CONTENT1 = {
'33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d', # README v1
'349c4ff7d21f1ec0eda26f3d9284c293e3425417', # README v2
'799c11e348d39f1704022b8354502e2f81f3c037', # file1.txt
'4bdb40dfd6ec75cb730e678b5d7786e30170c5fb', # file2.txt
- }
+}
SNAPSHOT_ID = 'bdf3b06d6017e0d9ad6447a73da6ff1ae9efb8f0'
@@ -89,7 +90,7 @@
'9ca0c7d6ffa3f9f0de59fd7912e08f11308a1338',
'bd746cd1913721b269b395a56a97baf6755151c2':
'e1d0d894835f91a0f887a4bc8b16f81feefdfbd5',
- }
+}
class BaseGitLoaderTest(BaseLoaderTest):
@@ -113,7 +114,7 @@
"""
def setUp(self):
- super().setUp('testrepo.tgz', True)
+ super().setUp('testrepo.tgz', uncompress_archive=True)
self.loader = GitLoaderTest()
self.storage = self.loader.storage
@@ -124,12 +125,7 @@
directory=self.destination_path)
-class GitLoaderFromArchiveTest(GitLoaderFromArchive):
- def parse_config_file(self, *args, **kwargs):
- return TEST_LOADER_CONFIG
-
-
-class BaseZipGitLoaderTest(BaseGitLoaderTest):
+class BaseGitLoaderFromArchiveTest(BaseGitLoaderTest):
"""Mixin base loader test to prepare the git
repository to uncompress, load and test the results.
@@ -137,32 +133,10 @@
"""
def setUp(self):
- super().setUp('testrepo.tgz', True)
- self._setup_zip()
- self.loader = GitLoaderFromArchiveTest()
+ super().setUp('testrepo.tgz', uncompress_archive=False)
+ self.loader = GitLoaderFromArchive()
self.storage = self.loader.storage
- def _setup_zip(self):
- self._zip_file = tempfile.NamedTemporaryFile('ab', suffix='.zip')
- dest_dir = os.path.normpath(self.destination_path) + '/'
- with zipfile.ZipFile(self._zip_file, 'a') as zip_writer:
- for root, dirs, files in os.walk(dest_dir):
- assert root.startswith(dest_dir)
- relative_root = os.path.join(
- 'testrepo',
- root[len(dest_dir):])
- for file_ in files:
- zip_writer.write(
- filename=os.path.join(root, file_),
- arcname=os.path.join(relative_root, file_))
- self.destination_path = self._zip_file.name
- self.tmp_root_path = None
- self.repo_url = 'file://' + self.destination_path
-
- def tearDown(self):
- self._zip_file.close()
- super().tearDown()
-
def load(self):
return self.loader.load(
origin_url=self.repo_url,
@@ -281,7 +255,7 @@
self.assertEqual(self.loader.visit_status(), 'full')
-class ZipGitLoaderTest(BaseZipGitLoaderTest, GitLoaderTests):
+class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderTests):
"""Tests for GitLoaderFromArchive. Imports the common ones
from GitLoaderTests."""
pass
diff --git a/swh/loader/git/utils.py b/swh/loader/git/utils.py
--- a/swh/loader/git/utils.py
+++ b/swh/loader/git/utils.py
@@ -10,7 +10,7 @@
import shutil
import tempfile
-from subprocess import call
+from swh.core import tarball
def init_git_repo_from_archive(project_name, archive_path,
@@ -41,11 +41,18 @@
try:
# create the repository that will be loaded with the dump
- r = call(['unzip', '-q', '-o', archive_path, '-d', temp_dir])
- if r != 0:
- raise ValueError('Failed to uncompress archive %s' % archive_path)
-
+ tarball.uncompress(archive_path, temp_dir)
repo_path = os.path.join(temp_dir, project_name)
+ # tarball content may not be as expected (e.g. no top level directory
+ # or a top level directory with a name different from project_name),
+ # so try to make it loadable anyway
+ if not os.path.exists(repo_path):
+ os.mkdir(repo_path)
+ for root, dirs, files in os.walk(temp_dir):
+ if '.git' in dirs:
+ shutil.copytree(os.path.join(root, '.git'),
+ os.path.join(repo_path, '.git'))
+ break
return temp_dir, repo_path
except Exception as e:
shutil.rmtree(temp_dir)

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:35 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221456

Event Timeline