Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/utils.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Utilities helper functions""" | """Utilities helper functions""" | ||||
import datetime | import datetime | ||||
import os | import os | ||||
import shutil | import shutil | ||||
import tempfile | import tempfile | ||||
from subprocess import call | from swh.core import tarball | ||||
def init_git_repo_from_archive(project_name, archive_path, | def init_git_repo_from_archive(project_name, archive_path, | ||||
root_temp_dir='/tmp'): | root_temp_dir='/tmp'): | ||||
"""Given a path to an archive containing a git repository. | """Given a path to an archive containing a git repository. | ||||
Uncompress that archive to a temporary location and returns the path. | Uncompress that archive to a temporary location and returns the path. | ||||
Show All 14 Lines | Raises | ||||
ValueError in case of failure to run the command to uncompress | ValueError in case of failure to run the command to uncompress | ||||
""" | """ | ||||
temp_dir = tempfile.mkdtemp( | temp_dir = tempfile.mkdtemp( | ||||
suffix='.swh.loader.git', prefix='tmp.', dir=root_temp_dir) | suffix='.swh.loader.git', prefix='tmp.', dir=root_temp_dir) | ||||
try: | try: | ||||
# create the repository that will be loaded with the dump | # create the repository that will be loaded with the dump | ||||
r = call(['unzip', '-q', '-o', archive_path, '-d', temp_dir]) | tarball.uncompress(archive_path, temp_dir) | ||||
if r != 0: | |||||
raise ValueError('Failed to uncompress archive %s' % archive_path) | |||||
repo_path = os.path.join(temp_dir, project_name) | repo_path = os.path.join(temp_dir, project_name) | ||||
# tarball content may not be as expected (e.g. no top level directory | |||||
# or a top level directory with a name different from project_name), | |||||
ardumont: Is that a production behavior or a test one? | |||||
Done Inline ActionsThe purpose here is to handle tarballs with an unexpected layout. Ideally, when encoutering a tarball named testrepo.tgz, we expect the following layout in it: testrepo/ ├── .git │ ├── branches │ ├── COMMIT_EDITMSG │ ├── config │ ├── description │ ├── HEAD ... But some times we can have layout like this: . ├── .git │ ├── branches │ ├── COMMIT_EDITMSG │ ├── config │ ├── description │ ├── HEAD ... or like this: other-repo-name/ ├── .git │ ├── branches │ ├── COMMIT_EDITMSG │ ├── config │ ├── description │ ├── HEAD ... The code below handles those cases. I can also remove it and indicate the expected layout in the docstring. anlambert: The purpose here is to handle tarballs with an unexpected layout.
Ideally, when encoutering a… | |||||
Not Done Inline Actions
Ah yes, i think it was for the gitorious/googlecode git archives ingestion...
Or Keep your code adaptation and move your comment and your 3 samples on the docstring ;) ardumont: > Ideally, when encoutering a tarball named testrepo.tgz, we expect the following layout in it… | |||||
# so try to make it loadable anyway | |||||
if not os.path.exists(repo_path): | |||||
os.mkdir(repo_path) | |||||
for root, dirs, files in os.walk(temp_dir): | |||||
if '.git' in dirs: | |||||
shutil.copytree(os.path.join(root, '.git'), | |||||
os.path.join(repo_path, '.git')) | |||||
break | |||||
return temp_dir, repo_path | return temp_dir, repo_path | ||||
except Exception as e: | except Exception as e: | ||||
shutil.rmtree(temp_dir) | shutil.rmtree(temp_dir) | ||||
raise e | raise e | ||||
def check_date_time(timestamp): | def check_date_time(timestamp): | ||||
"""Check date time for overflow errors. | """Check date time for overflow errors. | ||||
Show All 12 Lines |
Is that a production behavior or a test one?