Changeset View
Standalone View
swh/model/cli.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from enum import Enum | |||||
import os | import os | ||||
from pathlib import Path | |||||
import sys | import sys | ||||
from typing import Dict, List, Optional | from typing import Dict, List, Optional | ||||
zack: is this interleaving of from/import/from/import for modules all coming from the stdlib ok w.r.t. | |||||
# WARNING: do not import unnecessary things here to keep cli startup time under | # WARNING: do not import unnecessary things here to keep cli startup time under | ||||
# control | # control | ||||
import click | import click | ||||
from swh.core.cli import swh as swh_cli_group | from swh.core.cli import swh as swh_cli_group | ||||
from swh.model.identifiers import CoreSWHID, ObjectType | from swh.model.identifiers import CoreSWHID, ObjectType | ||||
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines | def identify_object(obj_type, follow_symlinks, exclude_patterns, obj) -> str: | ||||
else: # shouldn't happen, due to option validation | else: # shouldn't happen, due to option validation | ||||
raise click.BadParameter("invalid object type: " + obj_type) | raise click.BadParameter("invalid object type: " + obj_type) | ||||
# note: we return original obj instead of path here, to preserve user-given | # note: we return original obj instead of path here, to preserve user-given | ||||
# file name in output | # file name in output | ||||
return swhid | return swhid | ||||
class Color(Enum): | |||||
zackAuthorUnsubmitted Done Inline Actionsthis should be called something like "TerminalColor" or "ColorEscapes", it's not about colors in general zack: this should be called something like "TerminalColor" or "ColorEscapes", it's not about colors… | |||||
blue = "\033[94m" | |||||
green = "\033[92m" | |||||
end = "\033[0m" | |||||
zackAuthorUnsubmitted Done Inline Actionsenum values should be all uppercase zack: enum values should be all uppercase | |||||
def colorize(text, color): | |||||
zackAuthorUnsubmitted Done Inline Actionsmake this function a class method of the Enum zack: make this function a class method of the Enum | |||||
return color.value + text + color.end.value | |||||
ardumontUnsubmitted Done Inline ActionsMake the coloring output an option, the option should be off by default. ardumont: Make the coloring output an option, the option should be off by default.
If we want to pipe the… | |||||
zackAuthorUnsubmitted Done Inline ActionsAgreed. Unless you want to go the extra mile and behave like most git commands do. That is: detect if output is being piped to another command. If it is, disable coloring by default, but allow override with a flag. If it isn't being piped, enable coloring by default, allowing override with a flag. zack: Agreed.
Unless you want to go the extra mile and behave like most git commands do. That is… | |||||
KShivenduUnsubmitted Done Inline ActionsThat's a great idea. I will do it. KShivendu: That's a great idea. I will do it. | |||||
@swh_cli_group.command(context_settings=CONTEXT_SETTINGS) | @swh_cli_group.command(context_settings=CONTEXT_SETTINGS) | ||||
@click.option( | @click.option( | ||||
"--dereference/--no-dereference", | "--dereference/--no-dereference", | ||||
"follow_symlinks", | "follow_symlinks", | ||||
default=True, | default=True, | ||||
help="follow (or not) symlinks for OBJECTS passed as arguments " | help="follow (or not) symlinks for OBJECTS passed as arguments " | ||||
+ "(default: follow)", | + "(default: follow)", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--filename/--no-filename", | "--filename/--no-filename", | ||||
"show_filename", | "show_filename", | ||||
default=True, | default=True, | ||||
help="show/hide file name (default: show)", | help="show/hide file name (default: show)", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--recursive", "-r", is_flag=True, help="Compute SWHID recursively", | |||||
zackAuthorUnsubmitted Done Inline Actionsas SWHID are always computed recursively, just not shown by default, the help message should be something like "Show computed SWHID recursively" zack: as SWHID are always computed recursively, just not shown by default, the help message should be… | |||||
) | |||||
@click.option( | |||||
"--type", | "--type", | ||||
"-t", | "-t", | ||||
"obj_type", | "obj_type", | ||||
default="auto", | default="auto", | ||||
type=click.Choice(["auto", "content", "directory", "origin", "snapshot"]), | type=click.Choice(["auto", "content", "directory", "origin", "snapshot"]), | ||||
help="type of object to identify (default: auto)", | help="type of object to identify (default: auto)", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
Show All 9 Lines | @click.option( | ||||
"--verify", | "--verify", | ||||
"-v", | "-v", | ||||
metavar="SWHID", | metavar="SWHID", | ||||
type=CoreSWHIDParamType(), | type=CoreSWHIDParamType(), | ||||
help="reference identifier to be compared with computed one", | help="reference identifier to be compared with computed one", | ||||
) | ) | ||||
@click.argument("objects", nargs=-1, required=True) | @click.argument("objects", nargs=-1, required=True) | ||||
def identify( | def identify( | ||||
obj_type, verify, show_filename, follow_symlinks, objects, exclude_patterns, | obj_type, | ||||
verify, | |||||
show_filename, | |||||
recursive, | |||||
follow_symlinks, | |||||
objects, | |||||
exclude_patterns, | |||||
): | ): | ||||
"""Compute the Software Heritage persistent identifier (SWHID) for the given | """Compute the Software Heritage persistent identifier (SWHID) for the given | ||||
source code object(s). | source code object(s). | ||||
For more details about SWHIDs see: | For more details about SWHIDs see: | ||||
\b | \b | ||||
https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html | https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html | ||||
Tip: you can pass "-" to identify the content of standard input. | Tip: you can pass "-" to identify the content of standard input. | ||||
\b | \b | ||||
Examples: | Examples: | ||||
\b | \b | ||||
$ swh identify fork.c kmod.c sched/deadline.c | $ swh identify fork.c kmod.c sched/deadline.c | ||||
swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c | swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c | ||||
swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c | swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c | ||||
swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c | swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c | ||||
\b | \b | ||||
$ swh identify --no-filename /usr/src/linux/kernel/ | $ swh identify --R --no-filename /usr/src/linux/kernel/ | ||||
swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab | swh:1:dir: | ||||
KShivenduUnsubmitted Not Done Inline ActionsI will fix this and add a test soon. Do you see any other mistake in the diff? KShivendu: I will fix this and add a test soon. Do you see any other mistake in the diff? | |||||
\b | \b | ||||
$ git clone --mirror https://forge.softwareheritage.org/source/helloworld.git | $ git clone --mirror https://forge.softwareheritage.org/source/helloworld.git | ||||
$ swh identify --type snapshot helloworld.git/ | $ swh identify --type snapshot helloworld.git/ | ||||
swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git | swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git | ||||
""" # NoQA # overlong lines in shell examples are fine | """ # NoQA # overlong lines in shell examples are fine | ||||
from functools import partial | from functools import partial | ||||
if verify and len(objects) != 1: | if verify and len(objects) != 1: | ||||
raise click.BadParameter("verification requires a single object") | raise click.BadParameter("verification requires a single object") | ||||
results = zip( | partial_identify_obj = partial( | ||||
objects, | identify_object, obj_type, follow_symlinks, exclude_patterns | ||||
map( | |||||
partial(identify_object, obj_type, follow_symlinks, exclude_patterns), | |||||
objects, | |||||
), | |||||
) | ) | ||||
if recursive: | |||||
def out(path: Path): | |||||
is_dir = path.is_dir() | |||||
colored_path = colorize(path.name, Color.blue if is_dir else Color.green) | |||||
swhid = partial_identify_obj(str(path)) | |||||
return f"{colored_path} {swhid}" | |||||
for obj in list(objects): | |||||
obj = Path(obj) | |||||
for root, _, files in os.walk(str(obj)): | |||||
zackAuthorUnsubmitted Not Done Inline ActionsThis is not the right way to implement this. Because you're recomputing the SWHIDs at all level, making the complexity of this thing quadratic (O(n^2)). By reading this I realized that this issue is probably not an easy hack as I initially thought, but does require in fact some refactoring. Instead of calling the high-level functions to compute the SWHID of a single file or dir like we are doing right now, we should build a single model object for the top-level dir, and either output its SWHID, or traverse it (without recomputing SWHIDs) to output all of it. As observed in T1136 it is a feature that already exists in swh-scanner too, that should be refactored. zack: This is not the right way to implement this. Because you're recomputing the SWHIDs at all level… | |||||
KShivenduUnsubmitted Not Done Inline ActionsWhere do I start? Can you please give me some pointers for the same? KShivendu: Where do I start? Can you please give me some pointers for the same? | |||||
root = Path(root) | |||||
rel_depth = len(str(root.relative_to(obj)).split(os.sep)) | |||||
click.echo(f"{(rel_depth - 1) * ' │ '} {out(root)}") | |||||
for filename in files: | |||||
filepath = Path(os.path.join(root, filename)) | |||||
click.echo(f"{(rel_depth) * ' │ '} {out(filepath)}") | |||||
return | |||||
ardumontUnsubmitted Not Done Inline ActionsIt's missing tests. ardumont: It's missing tests.
Please, add some. | |||||
results = zip(objects, map(partial_identify_obj, objects)) | |||||
if verify: | if verify: | ||||
swhid = next(results)[1] | swhid = next(results)[1] | ||||
if str(verify) == swhid: | if str(verify) == swhid: | ||||
click.echo("SWHID match: %s" % swhid) | click.echo("SWHID match: %s" % swhid) | ||||
sys.exit(0) | sys.exit(0) | ||||
else: | else: | ||||
click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) | click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) | ||||
sys.exit(1) | sys.exit(1) | ||||
Show All 10 Lines |
is this interleaving of from/import/from/import for modules all coming from the stdlib ok w.r.t. coding guidelines and isort? (i'm not 100% sure of the answer, so maybe just double-check it)