Changeset View
Changeset View
Standalone View
Standalone View
swh/model/cli.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import click | import click | ||||
import dulwich.repo | |||||
import os | import os | ||||
import sys | import sys | ||||
from functools import partial | from functools import partial | ||||
from urllib.parse import urlparse | from urllib.parse import urlparse | ||||
from swh.model import hashutil | |||||
from swh.model import identifiers as pids | from swh.model import identifiers as pids | ||||
from swh.model.exceptions import ValidationError | from swh.model.exceptions import ValidationError | ||||
from swh.model.from_disk import Content, Directory | from swh.model.from_disk import Content, Directory | ||||
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) | CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) | ||||
# Mapping between dulwich types and Software Heritage ones. Used by snapshot ID | |||||
# computation. | |||||
_DULWICH_TYPES = { | |||||
b'blob': 'content', | |||||
b'tree': 'directory', | |||||
b'commit': 'revision', | |||||
b'tag': 'release', | |||||
} | |||||
class PidParamType(click.ParamType): | class PidParamType(click.ParamType): | ||||
name = 'persistent identifier' | name = 'persistent identifier' | ||||
def convert(self, value, param, ctx): | def convert(self, value, param, ctx): | ||||
try: | try: | ||||
pids.parse_persistent_identifier(value) | pids.parse_persistent_identifier(value) | ||||
return value # return as string, as we need just that | return value # return as string, as we need just that | ||||
Show All 12 Lines | |||||
def pid_of_origin(url): | def pid_of_origin(url): | ||||
pid = pids.PersistentId(object_type='origin', | pid = pids.PersistentId(object_type='origin', | ||||
object_id=pids.origin_identifier({'url': url})) | object_id=pids.origin_identifier({'url': url})) | ||||
return str(pid) | return str(pid) | ||||
def pid_of_git_repo(path): | |||||
repo = dulwich.repo.Repo(path) | |||||
branches = {} | |||||
for ref, target in repo.refs.as_dict().items(): | |||||
obj = repo[target] | |||||
if obj: | |||||
branches[ref] = { | |||||
'target': hashutil.bytehex_to_hash(target), | |||||
'target_type': _DULWICH_TYPES[obj.type_name], | |||||
} | |||||
else: | |||||
branches[ref] = None | |||||
snapshot = {'branches': branches} | |||||
pid = pids.PersistentId(object_type='snapshot', | |||||
object_id=pids.snapshot_identifier(snapshot)) | |||||
return str(pid) | |||||
def identify_object(obj_type, follow_symlinks, obj): | def identify_object(obj_type, follow_symlinks, obj): | ||||
if obj_type == 'auto': | if obj_type == 'auto': | ||||
if os.path.isfile(obj): | if os.path.isfile(obj): | ||||
obj_type = 'content' | obj_type = 'content' | ||||
elif os.path.isdir(obj): | elif os.path.isdir(obj): | ||||
obj_type = 'directory' | obj_type = 'directory' | ||||
else: | else: | ||||
try: # URL parsing | try: # URL parsing | ||||
Show All 12 Lines | if obj_type in ['content', 'directory']: | ||||
if follow_symlinks and os.path.islink(obj): | if follow_symlinks and os.path.islink(obj): | ||||
path = os.path.realpath(obj) | path = os.path.realpath(obj) | ||||
if obj_type == 'content': | if obj_type == 'content': | ||||
pid = pid_of_file(path) | pid = pid_of_file(path) | ||||
elif obj_type == 'directory': | elif obj_type == 'directory': | ||||
pid = pid_of_dir(path) | pid = pid_of_dir(path) | ||||
elif obj_type == 'origin': | elif obj_type == 'origin': | ||||
pid = pid_of_origin(obj) | pid = pid_of_origin(obj) | ||||
elif obj_type == 'snapshot': | |||||
pid = pid_of_git_repo(obj) | |||||
else: # shouldn't happen, due to option validation | else: # shouldn't happen, due to option validation | ||||
raise click.BadParameter('invalid object type: ' + obj_type) | raise click.BadParameter('invalid object type: ' + obj_type) | ||||
# note: we return original obj instead of path here, to preserve user-given | # note: we return original obj instead of path here, to preserve user-given | ||||
# file name in output | # file name in output | ||||
return (obj, pid) | return (obj, pid) | ||||
@click.command(context_settings=CONTEXT_SETTINGS) | @click.command(context_settings=CONTEXT_SETTINGS) | ||||
@click.option('--dereference/--no-dereference', 'follow_symlinks', | @click.option('--dereference/--no-dereference', 'follow_symlinks', | ||||
default=True, | default=True, | ||||
help='follow (or not) symlinks for OBJECTS passed as arguments ' | help='follow (or not) symlinks for OBJECTS passed as arguments ' | ||||
+ '(default: follow)') | + '(default: follow)') | ||||
@click.option('--filename/--no-filename', 'show_filename', default=True, | @click.option('--filename/--no-filename', 'show_filename', default=True, | ||||
help='show/hide file name (default: show)') | help='show/hide file name (default: show)') | ||||
@click.option('--type', '-t', 'obj_type', default='auto', | @click.option('--type', '-t', 'obj_type', default='auto', | ||||
type=click.Choice(['auto', 'content', 'directory', 'origin']), | type=click.Choice(['auto', 'content', 'directory', 'origin', | ||||
'snapshot']), | |||||
help='type of object to identify (default: auto)') | help='type of object to identify (default: auto)') | ||||
@click.option('--verify', '-v', metavar='PID', type=PidParamType(), | @click.option('--verify', '-v', metavar='PID', type=PidParamType(), | ||||
help='reference identifier to be compared with computed one') | help='reference identifier to be compared with computed one') | ||||
@click.argument('objects', nargs=-1, required=True) | @click.argument('objects', nargs=-1, required=True) | ||||
def identify(obj_type, verify, show_filename, follow_symlinks, objects): | def identify(obj_type, verify, show_filename, follow_symlinks, objects): | ||||
"""Compute the Software Heritage persistent identifier (PID) for the given | """Compute the Software Heritage persistent identifier (PID) for the given | ||||
source code object(s). | source code object(s). | ||||
Show All 10 Lines | \b | ||||
swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c | swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c | ||||
swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c | swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c | ||||
swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c | swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c | ||||
\b | \b | ||||
$ swh identify --no-filename /usr/src/linux/kernel/ | $ swh identify --no-filename /usr/src/linux/kernel/ | ||||
swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab | swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab | ||||
""" | \b | ||||
$ git clone --mirror https://forge.softwareheritage.org/source/helloworld.git | |||||
$ swh identify --type snapshot helloworld.git/ | |||||
swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git | |||||
""" # NoQA # overlong lines in shell examples are fine | |||||
if verify and len(objects) != 1: | if verify and len(objects) != 1: | ||||
raise click.BadParameter('verification requires a single object') | raise click.BadParameter('verification requires a single object') | ||||
results = map(partial(identify_object, obj_type, follow_symlinks), objects) | results = map(partial(identify_object, obj_type, follow_symlinks), objects) | ||||
if verify: | if verify: | ||||
pid = next(results)[1] | pid = next(results)[1] | ||||
if verify == pid: | if verify == pid: | ||||
Show All 15 Lines |