diff --git a/swh/loader/svn/converters.py b/swh/loader/svn/converters.py index 72e2f35..d0cbe48 100644 --- a/swh/loader/svn/converters.py +++ b/swh/loader/svn/converters.py @@ -1,238 +1,242 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from email import utils from swh.core import hashutil from .utils import strdate_to_timestamp def svn_date_to_gitsvn_date(strdate): """Convert a string date to an swh one. Args: strdate: A string formatted for .utils.strdate_to_timestamp to do its jobs Returns: An swh date format with an integer timestamp. """ + ts = strdate_to_timestamp(strdate) return { - 'timestamp': int(strdate_to_timestamp(strdate)), + 'timestamp': { + 'seconds': ts['seconds'], + 'microseconds': 0, + }, 'offset': 0 } def svn_date_to_swh_date(strdate): """Convert a string date to an swh one. Args: strdate: A string formatted for .utils.strdate_to_timestamp to do its jobs Returns: An swh date format """ return { 'timestamp': strdate_to_timestamp(strdate), 'offset': 0 } def svn_author_to_swh_person(author): """Convert an svn author to an swh person. Default policy: No information is added. Args: author (string): the svn author (in bytes) Returns: a dictionary with keys: fullname: the author's associated fullname name: the author's associated name email: None (no email in svn) """ if not author: return {'fullname': b'', 'name': None, 'email': None} author = author.encode('utf-8') if b'<' in author and b'>' in author: name, email = utils.parseaddr(author.decode('utf-8')) return { 'fullname': author, 'name': name.encode('utf-8'), 'email': email.encode('utf-8') } return {'fullname': author, 'email': None, 'name': author} def svn_author_to_gitsvn_person(author, repo_uuid): """Convert an svn author to a person suitable for insertion. Default policy: If no email is found, the email is created using the author and the repo_uuid. Args: author (string): the svn author (in bytes) repo_uuid (bytes): the repository's uuid Returns: a dictionary with keys: fullname: the author's associated fullname name: the author's associated name email: None (no email in svn) """ if not author: author = '(no author)' author = author.encode('utf-8') if b'<' in author and b'>' in author: name, email = utils.parseaddr(author.decode('utf-8')) return { 'fullname': author, 'name': name.encode('utf-8'), 'email': email.encode('utf-8') } # we'll construct the author's fullname the same way git svn does # 'user ' email = b'@'.join([author, repo_uuid]) return { 'fullname': b''.join([author, b' ', b'<', email, b'>']), 'name': author, 'email': email, } def build_swh_revision(rev, commit, repo_uuid, dir_id, parents): """Given a svn revision, build a swh revision. This adds an ['metadata']['extra-headers'] entry with the repository's uuid and the svn revision. Args: - rev: the svn revision number - commit: the commit metadata - repo_uuid: The repository's uuid - dir_id: the tree's hash identifier - parents: the revision's parents identifier Returns: The swh revision dictionary. """ author = commit['author_name'] msg = commit['message'] date = commit['author_date'] metadata = { 'extra_headers': [ ['svn_repo_uuid', repo_uuid], ['svn_revision', str(rev).encode('utf-8')] ] } return { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': dir_id, 'message': msg, 'author': author, 'committer': author, 'synthetic': True, 'metadata': metadata, 'parents': parents, } def build_gitsvn_swh_revision(rev, commit, dir_id, parents): """Given a svn revision, build a swh revision. Args: - rev: the svn revision number - commit: the commit metadata - dir_id: the tree's hash identifier - parents: the revision's parents identifier Returns: The swh revision dictionary. """ author = commit['author_name'] msg = commit['message'] date = commit['author_date'] return { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': dir_id, 'message': msg, 'author': author, 'committer': author, 'synthetic': True, 'metadata': None, 'parents': parents, } def build_swh_occurrence(revision_id, origin_id, visit): """Build a swh occurrence from the revision id, origin id, and date. """ return {'branch': 'master', 'target': revision_id, 'target_type': 'revision', 'origin': origin_id, 'visit': visit} def loader_to_scheduler_revision(swh_revision): """To avoid serialization or scheduler storage problem, transform adequately the revision. FIXME: Should be more generically dealt with in swh-scheduler's side. The advantage to having it here is that we known what we store. """ if not swh_revision: return None metadata = swh_revision['metadata'] for entry in (e for e in metadata['extra_headers'] if isinstance(e[1], bytes)): entry[1] = entry[1].decode('utf-8') return { 'id': hashutil.hash_to_hex(swh_revision['id']), 'parents': [hashutil.hash_to_hex(parent) for parent in swh_revision['parents']], 'metadata': metadata } def scheduler_to_loader_revision(swh_revision): """If the known state (a revision) is already passed, it will be serializable ready but not loader ready. FIXME: Should be more generically dealt with in swh-scheduler's side. The advantage to having it here is that we known what we store. """ if not swh_revision: return None return { 'id': hashutil.hex_to_hash(swh_revision['id']), 'parents': [hashutil.hex_to_hash(parent) for parent in swh_revision['parents']], 'metadata': swh_revision['metadata'] } diff --git a/swh/loader/svn/tests/test_converters.py b/swh/loader/svn/tests/test_converters.py index d2e28b5..6a90bcc 100644 --- a/swh/loader/svn/tests/test_converters.py +++ b/swh/loader/svn/tests/test_converters.py @@ -1,321 +1,368 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from swh.loader.svn import converters class TestAuthorGitSvnConverters(unittest.TestCase): @istest def svn_author_to_gitsvn_person(self): """The author should have name, email and fullname filled. """ actual_person = converters.svn_author_to_gitsvn_person( 'tony ', repo_uuid=None) self.assertEquals(actual_person, { 'fullname': b'tony ', 'name': b'tony', 'email': b'ynot@dagobah', }) @istest def svn_author_to_gitsvn_person_no_email(self): """The author should see his/her email filled with author@. """ actual_person = converters.svn_author_to_gitsvn_person( 'tony', repo_uuid=b'some-uuid') self.assertEquals(actual_person, { 'fullname': b'tony ', 'name': b'tony', 'email': b'tony@some-uuid', }) @istest def svn_author_to_gitsvn_person_empty_person(self): """The empty person should see name, fullname and email filled. """ actual_person = converters.svn_author_to_gitsvn_person( '', repo_uuid=b'some-uuid') self.assertEqual(actual_person, { 'fullname': b'(no author) <(no author)@some-uuid>', 'name': b'(no author)', 'email': b'(no author)@some-uuid' }) class TestAuthorSWHConverters(unittest.TestCase): @istest def svn_author_to_swh_person(self): """The author should have name, email and fullname filled. """ actual_person = converters.svn_author_to_swh_person( 'tony ') self.assertEquals(actual_person, { 'fullname': b'tony ', 'name': b'tony', 'email': b'ynot@dagobah', }) @istest def svn_author_to_swh_person_no_email(self): """The author and fullname should be the same as the input (author). """ actual_person = converters.svn_author_to_swh_person('tony') self.assertEquals(actual_person, { 'fullname': b'tony', 'name': b'tony', 'email': None, }) @istest def svn_author_to_swh_person_empty_person(self): """Empty person has only its fullname filled with the empty byte-string. """ actual_person = converters.svn_author_to_swh_person('') self.assertEqual(actual_person, { 'fullname': b'', 'name': None, 'email': None, }) class TestSWHRevisionConverters(unittest.TestCase): @istest def build_swh_revision_default(self): """This should build the swh revision with the swh revision's extra headers about the repository. """ actual_swh_revision = converters.build_swh_revision( repo_uuid=b'uuid', dir_id='dir-id', commit={ 'author_name': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'message': b'commit message', 'author_date': { - 'timestamp': 1088108379, + 'timestamp': { + 'seconds': 1088108379, + 'microseconds': 0, + }, 'offset': 0 } }, rev=10, parents=['123']) - date = {'timestamp': 1088108379, 'offset': 0} + date = { + 'timestamp': { + 'seconds': 1088108379, + 'microseconds': 0, + }, + 'offset': 0, + } self.assertEquals(actual_swh_revision, { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': 'dir-id', 'message': b'commit message', 'author': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'committer': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'synthetic': True, 'metadata': { 'extra_headers': [ ['svn_repo_uuid', b'uuid'], ['svn_revision', b'10'], ] }, 'parents': ['123'], }) class TestGitSvnRevisionConverters(unittest.TestCase): @istest def build_gitsvn_swh_revision_default(self): """This should build the swh revision without the swh revision's extra headers about the repository. """ actual_swh_revision = converters.build_gitsvn_swh_revision( dir_id='dir-id', commit={ 'author_name': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'message': b'commit message', 'author_date': { - 'timestamp': 1088108379, + 'timestamp': { + 'seconds': 1088108379, + 'microseconds': 0, + }, 'offset': 0 } }, rev=10, parents=['123']) - date = {'timestamp': 1088108379, 'offset': 0} + date = { + 'timestamp': { + 'seconds': 1088108379, + 'microseconds': 0, + }, + 'offset': 0, + } self.assertEquals(actual_swh_revision, { 'date': date, 'committer_date': date, 'type': 'svn', 'directory': 'dir-id', 'message': b'commit message', 'author': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'committer': { 'name': b'theo', 'email': b'theo@uuid', 'fullname': b'theo ' }, 'synthetic': True, 'metadata': None, 'parents': ['123'], }) class TestSWHOccurrence(unittest.TestCase): @istest def build_swh_occurrence(self): actual_occ = converters.build_swh_occurrence('revision-id', 'origin-id', visit=10) self.assertEquals(actual_occ, { 'branch': 'master', 'target': 'revision-id', 'target_type': 'revision', 'origin': 'origin-id', 'visit': 10 }) class ConvertSWHDate(unittest.TestCase): @istest def svn_date_to_swh_date(self): """The timestamp should not be tampered with and include the decimals. """ self.assertEquals( - converters.svn_date_to_swh_date('2011-05-31T06:04:39.500900Z'), - { - 'timestamp': 1306821879.5009, + converters.svn_date_to_swh_date('2011-05-31T06:04:39.500900Z'), { + 'timestamp': { + 'seconds': 1306821879, + 'microseconds': 500900, + }, 'offset': 0 }) self.assertEquals( converters.svn_date_to_swh_date('2011-05-31T06:04:39.800722Z'), { - 'timestamp': 1306821879.800722, + 'timestamp': { + 'seconds': 1306821879, + 'microseconds': 800722, + }, 'offset': 0 }) @istest def svn_date_to_swh_date_epoch(self): """Empty date should be EPOCH (timestamp and offset at 0).""" # It should return 0, epoch - self.assertEquals({'timestamp': 0, 'offset': 0}, - converters.svn_date_to_swh_date('')) - self.assertEquals({'timestamp': 0, 'offset': 0}, - converters.svn_date_to_swh_date(None)) + self.assertEquals({ + 'timestamp': { + 'seconds': 0, + 'microseconds': 0, + }, + 'offset': 0, + }, converters.svn_date_to_swh_date('')) + self.assertEquals({ + 'timestamp': { + 'seconds': 0, + 'microseconds': 0, + }, 'offset': 0, + }, converters.svn_date_to_swh_date(None)) class ConvertGitSvnDate(unittest.TestCase): @istest def svn_date_to_gitsvn_date(self): """The timestamp should be truncated to be an integer.""" actual_ts = converters.svn_date_to_gitsvn_date( '2011-05-31T06:04:39.800722Z') - self.assertEquals(actual_ts, - {'timestamp': 1306821879, 'offset': 0}) + self.assertEquals(actual_ts, { + 'timestamp': { + 'seconds': 1306821879, + 'microseconds': 0, + }, + 'offset': 0, + }) @istest def svn_date_to_gitsvn_date_epoch(self): """Empty date should be EPOCH (timestamp and offset at 0).""" # It should return 0, epoch - self.assertEquals({'timestamp': 0, 'offset': 0}, - converters.svn_date_to_gitsvn_date('')) - self.assertEquals({'timestamp': 0, 'offset': 0}, - converters.svn_date_to_gitsvn_date(None)) + self.assertEquals({ + 'timestamp': { + 'seconds': 0, + 'microseconds': 0, + }, + 'offset': 0, + }, converters.svn_date_to_gitsvn_date('')) + self.assertEquals({ + 'timestamp': { + 'seconds': 0, + 'microseconds': 0, + }, + 'offset': 0, + }, converters.svn_date_to_gitsvn_date(None)) class ConvertSWHRevision(unittest.TestCase): @istest def loader_to_scheduler_revision(self): actual_rev = converters.loader_to_scheduler_revision({ 'parents': [b'e\n\xbe\xe9\xc0\x87y\xfeG\xf7\xcfG\x82h\xa8i\xe8\xfe\xe2\x13'], # noqa 'id': b'\xedd\x92w\xab\xb2\x16,\xea*\x90O8\x0f\x96/\xfb\xd4\x16`', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', b'bc7d6c17-68a5-4917-9c54-c565d7424229'], ['svn_revision', b'4'] ] } }) self.assertEquals(actual_rev, { 'id': 'ed649277abb2162cea2a904f380f962ffbd41660', 'parents': ['650abee9c08779fe47f7cf478268a869e8fee213'], 'metadata': { 'extra_headers': [ ['svn_repo_uuid', 'bc7d6c17-68a5-4917-9c54-c565d7424229'], ['svn_revision', '4'] ] } }) @istest def loader_to_scheduler_revision_none(self): self.assertIsNone(converters.loader_to_scheduler_revision(None)) @istest def scheduler_to_loader_revision(self): actual_rev = converters.scheduler_to_loader_revision({ 'id': 'ed649277abb2162cea2a904f380f962ffbd41660', 'parents': ['650abee9c08779fe47f7cf478268a869e8fee213'], 'metadata': { 'extra_headers': [ ['svn_repo_uuid', 'bc7d6c17-68a5-4917-9c54-c565d7424229'], ['svn_revision', '4'] ] } }) self.assertEquals(actual_rev, { 'parents': [b'e\n\xbe\xe9\xc0\x87y\xfeG\xf7\xcfG\x82h\xa8i\xe8\xfe\xe2\x13'], # noqa 'id': b'\xedd\x92w\xab\xb2\x16,\xea*\x90O8\x0f\x96/\xfb\xd4\x16`', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', 'bc7d6c17-68a5-4917-9c54-c565d7424229'], ['svn_revision', '4'] ] } }) @istest def scheduler_to_loader_revision_none(self): self.assertIsNone(converters.scheduler_to_loader_revision(None)) diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py index 32a918a..f102966 100644 --- a/swh/loader/svn/tests/test_utils.py +++ b/swh/loader/svn/tests/test_utils.py @@ -1,189 +1,193 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import unittest from nose.tools import istest from test_base import BaseTestTreeLoader from swh.loader.svn import utils from swh.model import git class TestUtils(unittest.TestCase): @istest def strdate_to_timestamp(self): """Formatted string date should be converted in timestamp.""" actual_ts = utils.strdate_to_timestamp('2011-05-31T06:04:39.800722Z') - self.assertEquals(actual_ts, 1306821879.800722) + self.assertEquals(actual_ts, {'seconds': 1306821879, + 'microseconds': 800722}) actual_ts = utils.strdate_to_timestamp('2011-05-31T06:03:39.123450Z') - self.assertEquals(actual_ts, 1306821819.12345) + self.assertEquals(actual_ts, {'seconds': 1306821819, + 'microseconds': 123450}) @istest def strdate_to_timestamp_empty_does_not_break(self): """Empty or None date should be timestamp 0.""" - self.assertEquals(0, utils.strdate_to_timestamp('')) - self.assertEquals(0, utils.strdate_to_timestamp(None)) + self.assertEquals({'seconds': 0, 'microseconds': 0}, + utils.strdate_to_timestamp('')) + self.assertEquals({'seconds': 0, 'microseconds': 0}, + utils.strdate_to_timestamp(None)) class TestHashesConvert(unittest.TestCase): def setUp(self): self.hashes = { b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox': { 'checksums': { 'name': b'pkg-fox', 'sha1_git': b'\xad\xdf2x\x1fBX\xdb\xe8Adt\xc9\xf5~\xcb6\x98^\xbf', # noqa 'path': b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox' }, 'children': { b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/fox-1.2', b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/fox-1.4' } }, b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/fox-1.4': { 'checksums': 'something', 'children': set() }, b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/fox-1.2': { 'checksums': 'something' }, b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/fox-1.3': { 'checksums': 'or something', 'children': { b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/some/path' } } } self.expected_output = { b'': { 'checksums': { 'name': b'pkg-fox', 'sha1_git': b'\xad\xdf2x\x1fBX\xdb\xe8Adt\xc9\xf5~\xcb6\x98^\xbf', # noqa 'path': b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox' }, 'children': { b'fox-1.2', b'fox-1.4' } }, b'fox-1.4': { 'checksums': 'something', 'children': set() }, b'fox-1.2': { 'checksums': 'something', }, b'fox-1.3': { 'checksums': 'or something', 'children': { b'some/path' } } } @istest def convert_hashes_with_relative_path(self): actual_output = utils.convert_hashes_with_relative_path( self.hashes, b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox') self.assertEquals(actual_output, self.expected_output) @istest def convert_hashes_with_relative_path_with_slash(self): actual_output = utils.convert_hashes_with_relative_path( self.hashes, b'/tmp/tmp.c39vkrp1.swh.loader/pkg-fox/') self.assertEquals(actual_output, self.expected_output) class HashtreeITTest(BaseTestTreeLoader): @istest def hashtree_not_existing_path(self): # path does not exist with self.assertRaises(ValueError): utils.hashtree('/not/exists', ignore_empty_folder=False) @istest def hashtree_not_a_dir(self): fpath = '/tmp/foobar' with open(fpath, 'wb') as f: f.write(b'foo') # path is not a folder with self.assertRaises(ValueError): utils.hashtree(fpath, ignore_empty_folder=True) os.unlink(fpath) @istest def hashtree_with_empty_folder(self): # not ignoring empty folder # no pattern to ignore # this is the base case root_hash = self.tmp_root_path.encode('utf-8') actual_hash = utils.hashtree(root_hash, ignore_empty_folder=False) expected_hashes = git.compute_hashes_from_directory( self.tmp_root_path.encode('utf-8')) expected_hash = expected_hashes[root_hash]['checksums']['sha1_git'] self.assertEquals(actual_hash['sha1_git'], expected_hash) @istest def hashtree_ignore_pattern_with_empty_folder(self): # not ignoring empty folder # 'empty-folder' pattern to ignore root_hash = self.tmp_root_path.encode('utf-8') actual_hash = utils.hashtree(root_hash, ignore_empty_folder=False, ignore=['empty-folder']) expected_hashes = git.compute_hashes_from_directory( self.tmp_root_path.encode('utf-8'), dir_ok_fn=lambda dp: b'empty-folder' not in dp) expected_hash = expected_hashes[root_hash]['checksums']['sha1_git'] self.assertEquals(actual_hash['sha1_git'], expected_hash) @istest def hashtree_ignore_pattern_no_empty_folder(self): # ignoring empty folder # '/barfoo/' pattern to ignore root_hash = self.tmp_root_path.encode('utf-8') actual_hash = utils.hashtree(root_hash, ignore_empty_folder=True, ignore=['/barfoo/']) def ignore_fn(dp): return b'/barfoo/' not in dp expected_hashes = git.compute_hashes_from_directory( self.tmp_root_path.encode('utf-8'), dir_ok_fn=ignore_fn, remove_empty_folder=True) expected_hash = expected_hashes[root_hash]['checksums']['sha1_git'] self.assertEquals(actual_hash['sha1_git'], expected_hash) @istest def hashtree_no_ignore_pattern_no_empty_folder(self): # ignoring empty folder root_hash = self.tmp_root_path.encode('utf-8') actual_hash = utils.hashtree(root_hash, ignore_empty_folder=True) expected_hashes = git.compute_hashes_from_directory( self.tmp_root_path.encode('utf-8'), remove_empty_folder=True) expected_hash = expected_hashes[root_hash]['checksums']['sha1_git'] self.assertEquals(actual_hash['sha1_git'], expected_hash) diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py index 354cead..e82db15 100644 --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -1,175 +1,178 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import tempfile import shutil from dateutil import parser from subprocess import PIPE, Popen, call from swh.model import git def strdate_to_timestamp(strdate): """Convert a string date to an int timestamp. Args: strdate: A string representing a date with format like 'YYYY-mm-DDTHH:MM:SS.800722Z' Returns: - A timestamp in float + A couple of integers: seconds, microseconds """ if strdate: dt = parser.parse(strdate) - ts_float = dt.timestamp() + ts = { + 'seconds': int(dt.timestamp()), + 'microseconds': dt.microsecond, + } else: # epoch - ts_float = 0 - return ts_float + ts = {'seconds': 0, 'microseconds': 0} + return ts def convert_hashes_with_relative_path(hashes, rootpath): """A function to ease the transformation of absolute path to relative ones. This is an implementation detail: - swh.loader.svn.ra compute hashes and store keys with relative paths - swh.model.git compute hashes and store keys with full paths """ if rootpath.endswith(b'/'): rootpath = rootpath[:-1] root_value = hashes.pop(rootpath) if not rootpath.endswith(b'/'): rootpath = rootpath + b'/' def _replace_slash(s, rootpath=rootpath): return s.replace(rootpath, b'') def _update_children(children): return set((_replace_slash(c) for c in children)) h = { b'': { 'checksums': root_value['checksums'], 'children': _update_children(root_value['children']) } } for path, v in hashes.items(): p = _replace_slash(path) if 'children' in v: v['children'] = _update_children(v['children']) h[p] = v return h def hashtree(path, ignore_empty_folder=False, ignore=None): """Given a path and options, compute the hash's upper tree. This is not for production use. It's merely a helper function used mainly in bin/swh-hashtree Args: - path: The path to hash - ignore_empty_folder: An option to ignore empty folder - ignore: An option to ignore patterns in directory names. Returns: The path's checksums respecting the options passed as parameters. """ if os.path.exists(path): if not os.path.isdir(path): raise ValueError('%s should be a directory!' % path) else: raise ValueError('%s should exist!' % path) if isinstance(path, str): path = path.encode('utf-8') if ignore: patterns = [] for exc in ignore: patterns.append(exc.encode('utf-8')) def dir_ok_fn_basic(dirpath, patterns=patterns): dname = os.path.basename(dirpath) for pattern_to_ignore in patterns: if pattern_to_ignore == dname: return False if (pattern_to_ignore + b'/') in dirpath: return False return True if ignore_empty_folder: def dir_ok_fn(dirpath, patterns=patterns): if not dir_ok_fn_basic(dirpath): return False return os.listdir(dirpath) != [] else: dir_ok_fn = dir_ok_fn_basic else: if ignore_empty_folder: def dir_ok_fn(dirpath): return os.listdir(dirpath) != [] else: dir_ok_fn = git.default_validation_dir objects = git.compute_hashes_from_directory( path, dir_ok_fn=dir_ok_fn) h = objects[path]['checksums'] return h def init_svn_repo_from_archive_dump(archive_path, root_temp_dir='/tmp'): """Given a path to an archive containing an svn dump. Initialize an svn repository with the content of said dump. Returns: A tuple: - temporary folder: containing the mounted repository - repo_path, path to the mounted repository inside the temporary folder Raises: ValueError in case of failure to run the command to uncompress and load the dump. """ project_name = os.path.basename(os.path.dirname(archive_path)) temp_dir = tempfile.mkdtemp(suffix='.swh.loader.svn', prefix='tmp.', dir=root_temp_dir) try: repo_path = os.path.join(temp_dir, project_name) # create the repository that will be loaded with the dump cmd = ['svnadmin', 'create', repo_path] r = call(cmd) if r != 0: raise ValueError( 'Failed to initialize empty svn repo for %s' % project_name) with Popen(['pigz', '-dc', archive_path], stdout=PIPE) as dump: cmd = ['svnadmin', 'load', '-q', repo_path] r = call(cmd, stdin=dump.stdout) if r != 0: raise ValueError( 'Failed to mount the svn dump for project %s' % project_name) return temp_dir, repo_path except Exception as e: shutil.rmtree(temp_dir) raise e