diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -8,6 +8,9 @@ [mypy-django.*] # false positive, only used my hypotesis' extras ignore_missing_imports = True +[mypy-dulwich.*] # false positive, only used my hypotesis' extras +ignore_missing_imports = True + [mypy-pkg_resources.*] ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ attrs hypothesis python-dateutil +dulwich diff --git a/swh/model/cli.py b/swh/model/cli.py --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -4,12 +4,14 @@ # See top-level LICENSE file for more information import click +import dulwich.repo import os import sys from functools import partial from urllib.parse import urlparse +from swh.model import hashutil from swh.model import identifiers as pids from swh.model.exceptions import ValidationError from swh.model.from_disk import Content, Directory @@ -17,6 +19,15 @@ CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) +# Mapping between dulwich types and Software Heritage ones. Used by snapshot ID +# computation. +_DULWICH_TYPES = { + b'blob': 'content', + b'tree': 'directory', + b'commit': 'revision', + b'tag': 'release', +} + class PidParamType(click.ParamType): name = 'persistent identifier' @@ -45,6 +56,26 @@ return str(pid) +def pid_of_git_repo(path): + repo = dulwich.repo.Repo(path) + + branches = {} + for ref, target in repo.refs.as_dict().items(): + obj = repo[target] + if obj: + branches[ref] = { + 'target': hashutil.bytehex_to_hash(target), + 'target_type': _DULWICH_TYPES[obj.type_name], + } + else: + branches[ref] = None + snapshot = {'branches': branches} + + pid = pids.PersistentId(object_type='snapshot', + object_id=pids.snapshot_identifier(snapshot)) + return str(pid) + + def identify_object(obj_type, follow_symlinks, obj): if obj_type == 'auto': if os.path.isfile(obj): @@ -73,6 +104,8 @@ pid = pid_of_dir(path) elif obj_type == 'origin': pid = pid_of_origin(obj) + elif obj_type == 'snapshot': + pid = pid_of_git_repo(obj) else: # shouldn't happen, due to option validation raise click.BadParameter('invalid object type: ' + obj_type) @@ -89,7 +122,8 @@ @click.option('--filename/--no-filename', 'show_filename', default=True, help='show/hide file name (default: show)') @click.option('--type', '-t', 'obj_type', default='auto', - type=click.Choice(['auto', 'content', 'directory', 'origin']), + type=click.Choice(['auto', 'content', 'directory', 'origin', + 'snapshot']), help='type of object to identify (default: auto)') @click.option('--verify', '-v', metavar='PID', type=PidParamType(), help='reference identifier to be compared with computed one') @@ -116,7 +150,12 @@ $ swh identify --no-filename /usr/src/linux/kernel/ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab - """ + \b + $ git clone --mirror https://forge.softwareheritage.org/source/helloworld.git + $ swh identify --type snapshot helloworld.git/ + swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git + + """ # NoQA # overlong lines in shell examples are fine if verify and len(objects) != 1: raise click.BadParameter('verification requires a single object') diff --git a/swh/model/tests/data/repos/sample-repo.tgz b/swh/model/tests/data/repos/sample-repo.tgz new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@