diff --git a/bin/swh-hashtree b/bin/swh-hashtree index 644b862..bcc691b 100755 --- a/bin/swh-hashtree +++ b/bin/swh-hashtree @@ -1,34 +1,34 @@ #!/usr/bin/env python3 # Use sample: # swh-hashtree --path . --ignore '.svn' --ignore '.git-svn' \ # --ignore-empty-folders # 38f8d2c3a951f6b94007896d0981077e48bbd702 import click from swh.core import hashutil from swh.loader.svn.utils import hashtree @click.command() @click.option('--path', default='.', help='Optional path to hash.') @click.option('--ignore-empty-folder', is_flag=True, default=False, help='Ignore empty folder.') @click.option('--ignore', multiple=True, help='Ignore pattern.') -def main(path, ignore_empty_folder, ignore=None): +def main(path, ignore_empty_folder=False, ignore=None): try: h = hashtree(path, ignore_empty_folder, ignore) except Exception as e: print(e) return else: if h: print(hashutil.hash_to_hex(h['sha1_git'])) if __name__ == '__main__': main() diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py index b2cc042..22842fa 100644 --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -1,128 +1,128 @@ # Copyright (C) 2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from dateutil import parser from swh.model import git def strdate_to_timestamp(strdate): """Convert a string date to an int timestamp. Args: strdate: A string representing a date with format like 'YYYY-mm-DDTHH:MM:SS.800722Z' Returns: A timestamp in float """ if strdate: dt = parser.parse(strdate) ts_float = dt.timestamp() else: # epoch ts_float = 0 return ts_float def convert_hashes_with_relative_path(hashes, rootpath): """A function to ease the transformation of absolute path to relative ones. This is an implementation detail: - swh.loader.svn.ra compute hashes and store keys with relative paths - swh.model.git compute hashes and store keys with full paths """ if rootpath.endswith(b'/'): rootpath = rootpath[:-1] root_value = hashes.pop(rootpath) if not rootpath.endswith(b'/'): rootpath = rootpath + b'/' def _replace_slash(s, rootpath=rootpath): return s.replace(rootpath, b'') def _update_children(children): return set((_replace_slash(c) for c in children)) h = { b'': { 'checksums': root_value['checksums'], 'children': _update_children(root_value['children']) } } for path, v in hashes.items(): p = _replace_slash(path) if 'children' in v: v['children'] = _update_children(v['children']) h[p] = v return h -def hashtree(path, ignore_empty_folder, ignore=None): +def hashtree(path, ignore_empty_folder=False, ignore=None): """Given a path and options, compute the hash's upper tree. This is not for production use. It's merely a helper function used mainly in bin/swh-hashtree Args: - path: The path to hash - ignore_empty_folder: An option to ignore empty folder - ignore: An option to ignore patterns in directory names. Returns: The path's checksums respecting the options passed as parameters. """ if os.path.exists(path): if not os.path.isdir(path): raise ValueError('%s should be a directory!' % path) else: raise ValueError('%s should exist!' % path) if isinstance(path, str): path = path.encode('utf-8') if ignore: patterns = [] for exc in ignore: patterns.append(exc.encode('utf-8')) def dir_ok_fn_basic(dirpath, patterns=patterns): dname = os.path.basename(dirpath) for pattern_to_ignore in patterns: if pattern_to_ignore == dname: return False if (pattern_to_ignore + b'/') in dirpath: return False return True if ignore_empty_folder: def dir_ok_fn(dirpath, patterns=patterns): if not dir_ok_fn_basic(dirpath): return False return os.listdir(dirpath) != [] else: dir_ok_fn = dir_ok_fn_basic else: if ignore_empty_folder: def dir_ok_fn(dirpath): return os.listdir(dirpath) != [] else: dir_ok_fn = git.default_validation_dir objects = git.compute_hashes_from_directory( path, dir_ok_fn=dir_ok_fn) h = objects[path]['checksums'] return h