diff --git a/pytest.ini b/pytest.ini index 7a1413f..9fa2d75 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = --doctest-modules +addopts = --doctest-modules -p no:pytest_swh_core norecursedirs = docs .* markers = fs: tests that involve filesystem ios diff --git a/requirements-cli.txt b/requirements-cli.txt index 7365d1f..8564d00 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,3 +1,3 @@ -swh.core +swh.core >= 0.3 Click dulwich diff --git a/setup.py b/setup.py index f42058c..8f9d32f 100755 --- a/setup.py +++ b/setup.py @@ -1,79 +1,79 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import open from os import path from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements blake2_requirements = ['pyblake2;python_version<"3.6"'] setup( name="swh.model", description="Software Heritage data model", long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DMOD/", packages=find_packages(), setup_requires=["setuptools-scm"], use_scm_version=True, install_requires=( parse_requirements() + parse_requirements("swh") + blake2_requirements ), extras_require={ "cli": parse_requirements("cli"), - "testing": parse_requirements("test"), + "testing": parse_requirements("test") + parse_requirements("cli"), }, include_package_data=True, entry_points=""" [console_scripts] swh-identify=swh.model.cli:identify [swh.cli.subcommands] - identify=swh.model.cli:identify + identify=swh.model.cli """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-model", "Documentation": "https://docs.softwareheritage.org/devel/swh-model/", }, ) diff --git a/swh/model/cli.py b/swh/model/cli.py index 68a3680..4c8b7c1 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -1,225 +1,227 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import sys # WARNING: do not import unnecessary things here to keep cli startup time under # control import click +from swh.core.cli import swh as swh_cli_group + CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) # Mapping between dulwich types and Software Heritage ones. Used by snapshot ID # computation. _DULWICH_TYPES = { b"blob": "content", b"tree": "directory", b"commit": "revision", b"tag": "release", } class SWHIDParamType(click.ParamType): name = "persistent identifier" def convert(self, value, param, ctx): from swh.model.exceptions import ValidationError from swh.model.identifiers import parse_swhid try: parse_swhid(value) return value # return as string, as we need just that except ValidationError as e: self.fail("%s is not a valid SWHID. %s." % (value, e), param, ctx) def swhid_of_file(path): from swh.model.from_disk import Content from swh.model.identifiers import CONTENT, swhid object = Content.from_file(path=path).get_data() return swhid(CONTENT, object) def swhid_of_file_content(data): from swh.model.from_disk import Content from swh.model.identifiers import CONTENT, swhid object = Content.from_bytes(mode=644, data=data).get_data() return swhid(CONTENT, object) def swhid_of_dir(path): from swh.model.from_disk import Directory from swh.model.identifiers import DIRECTORY, swhid object = Directory.from_disk(path=path).get_data() return swhid(DIRECTORY, object) def swhid_of_origin(url): from swh.model.identifiers import SWHID, origin_identifier return str(SWHID(object_type="origin", object_id=origin_identifier({"url": url}))) def swhid_of_git_repo(path): import dulwich.repo from swh.model import hashutil from swh.model.identifiers import SWHID, snapshot_identifier repo = dulwich.repo.Repo(path) branches = {} for ref, target in repo.refs.as_dict().items(): obj = repo[target] if obj: branches[ref] = { "target": hashutil.bytehex_to_hash(target), "target_type": _DULWICH_TYPES[obj.type_name], } else: branches[ref] = None for ref, target in repo.refs.get_symrefs().items(): branches[ref] = { "target": target, "target_type": "alias", } snapshot = {"branches": branches} return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot))) def identify_object(obj_type, follow_symlinks, obj): from urllib.parse import urlparse if obj_type == "auto": if obj == "-" or os.path.isfile(obj): obj_type = "content" elif os.path.isdir(obj): obj_type = "directory" else: try: # URL parsing if urlparse(obj).scheme: obj_type = "origin" else: raise ValueError except ValueError: raise click.BadParameter("cannot detect object type for %s" % obj) swhid = None if obj == "-": content = sys.stdin.buffer.read() swhid = swhid_of_file_content(content) elif obj_type in ["content", "directory"]: path = obj.encode(sys.getfilesystemencoding()) if follow_symlinks and os.path.islink(obj): path = os.path.realpath(obj) if obj_type == "content": swhid = swhid_of_file(path) elif obj_type == "directory": swhid = swhid_of_dir(path) elif obj_type == "origin": swhid = swhid_of_origin(obj) elif obj_type == "snapshot": swhid = swhid_of_git_repo(obj) else: # shouldn't happen, due to option validation raise click.BadParameter("invalid object type: " + obj_type) # note: we return original obj instead of path here, to preserve user-given # file name in output return (obj, swhid) -@click.command(context_settings=CONTEXT_SETTINGS) +@swh_cli_group.command(context_settings=CONTEXT_SETTINGS) @click.option( "--dereference/--no-dereference", "follow_symlinks", default=True, help="follow (or not) symlinks for OBJECTS passed as arguments " + "(default: follow)", ) @click.option( "--filename/--no-filename", "show_filename", default=True, help="show/hide file name (default: show)", ) @click.option( "--type", "-t", "obj_type", default="auto", type=click.Choice(["auto", "content", "directory", "origin", "snapshot"]), help="type of object to identify (default: auto)", ) @click.option( "--verify", "-v", metavar="SWHID", type=SWHIDParamType(), help="reference identifier to be compared with computed one", ) @click.argument("objects", nargs=-1, required=True) def identify(obj_type, verify, show_filename, follow_symlinks, objects): """Compute the Software Heritage persistent identifier (SWHID) for the given source code object(s). For more details about SWHIDs see: \b https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html Tip: you can pass "-" to identify the content of standard input. \b Examples: \b $ swh identify fork.c kmod.c sched/deadline.c swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c \b $ swh identify --no-filename /usr/src/linux/kernel/ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab \b $ git clone --mirror https://forge.softwareheritage.org/source/helloworld.git $ swh identify --type snapshot helloworld.git/ swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git """ # NoQA # overlong lines in shell examples are fine from functools import partial if verify and len(objects) != 1: raise click.BadParameter("verification requires a single object") results = map(partial(identify_object, obj_type, follow_symlinks), objects) if verify: swhid = next(results)[1] if verify == swhid: click.echo("SWHID match: %s" % swhid) sys.exit(0) else: click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) sys.exit(1) else: for (obj, swhid) in results: msg = swhid if show_filename: msg = "%s\t%s" % (swhid, os.fsdecode(obj)) click.echo(msg) if __name__ == "__main__": identify()