diff --git a/README.md b/README.md new file mode 100644 index 0000000..07dcc67 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +swh-scanner +=========== + +Source code scanner to analyze code bases and compare them with source code +artifacts archived by Software Heritage + +See the +[documentation](https://docs.softwareheritage.org/devel/swh-scanner/index.html) +for more details. \ No newline at end of file diff --git a/README.rst b/README.rst deleted file mode 120000 index cffceba..0000000 --- a/README.rst +++ /dev/null @@ -1 +0,0 @@ -docs/README.rst \ No newline at end of file diff --git a/docs/README.rst b/docs/README.rst deleted file mode 100644 index 4ecee0d..0000000 --- a/docs/README.rst +++ /dev/null @@ -1,26 +0,0 @@ -Software Heritage - Code Scanner -================================ - -Source code scanner using the -`Software Heritage `_ -`archive `_ -as knowledge base. - - -Sample usage ------------- - -.. code-block:: shell - - $ swh scanner scan --help - - Usage: swh scanner scan [OPTIONS] PATH - - Scan a source code project to discover files and directories already - present in the archive - - Options: - -u, --api-url API_URL url for the api request [default: - https://archive.softwareheritage.org/api/1] - -f, --format [text|json] select the output format - -h, --help Show this message and exit. diff --git a/docs/cli.rst b/docs/cli.rst new file mode 100644 index 0000000..0e8d528 --- /dev/null +++ b/docs/cli.rst @@ -0,0 +1,6 @@ +Command-line interface +====================== + +.. click:: swh.scanner.cli:scan + :prog: swh scanner scan + :show-nested: diff --git a/docs/index.rst b/docs/index.rst index 90a3290..b7f9c62 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,16 +1,17 @@ .. _swh-scanner: -.. include:: README.rst +Software Heritage - Code Scanner +================================ + +Source code scanner using the +`Software Heritage `_ +`archive `_ +as knowledge base. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Contents: - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + cli + /apidoc/swh.scanner diff --git a/setup.py b/setup.py index f9835b3..84e7e72 100755 --- a/setup.py +++ b/setup.py @@ -1,74 +1,74 @@ #!/usr/bin/env python3 # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file -with open(path.join(here, "README.rst"), encoding="utf-8") as f: +with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements # Edit this part to match your module. # Full sample: # https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py setup( name="swh.scanner", description="Software Heritage code scanner", long_description=long_description, - long_description_content_type="text/x-rst", + long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DTSCN/", packages=find_packages(), # packages's modules install_requires=parse_requirements() + parse_requirements("swh"), tests_require=parse_requirements("test"), setup_requires=["setuptools-scm"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, entry_points=""" [swh.cli.subcommands] scanner=swh.scanner.cli:scanner """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 3 - Alpha", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-scanner", "Documentation": "https://docs.softwareheritage.org/devel/swh-scanner/", }, ) diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py index 7fde5a8..5ee2382 100644 --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -1,116 +1,118 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import click from pathlib import PosixPath from typing import Tuple from swh.core.cli import CONTEXT_SETTINGS @click.group(name="scanner", context_settings=CONTEXT_SETTINGS) @click.pass_context def scanner(ctx): """Software Heritage Scanner tools.""" pass def parse_url(url): if not url.startswith("https://"): url = "https://" + url if not url.endswith("/"): url += "/" return url def extract_regex_objs(root_path: PosixPath, patterns: Tuple[str]) -> object: """Generates a regex object for each pattern given in input and checks if the path is a subdirectory or relative to the root path. Yields: an SRE_Pattern object """ import glob import fnmatch import re from .exceptions import InvalidDirectoryPath for pattern in patterns: for path in glob.glob(pattern): dirpath = PosixPath(path) if root_path not in dirpath.parents: error_msg = ( f'The path "{dirpath}" is not a subdirectory or relative ' f'to the root directory path: "{root_path}"' ) raise InvalidDirectoryPath(error_msg) if glob.glob(pattern): regex = fnmatch.translate(str(PosixPath(pattern))) yield re.compile(regex) @scanner.command(name="scan") @click.argument("root_path", required=True, type=click.Path(exists=True)) @click.option( "-u", "--api-url", default="https://archive.softwareheritage.org/api/1", metavar="API_URL", show_default=True, - help="url for the api request", + help="URL for the api request", ) @click.option( "--exclude", "-x", "patterns", metavar="PATTERN", multiple=True, - help="recursively exclude a specific pattern", + help="Exclude directories using glob patterns \ + (e.g., '*.git' to exclude all .git directories)", ) @click.option( "-f", "--format", - type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False), default="text", - help="select the output format", + show_default=True, + type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False), + help="The output format", ) @click.option( - "-i", "--interactive", is_flag=True, help="show the result in a dashboard" + "-i", "--interactive", is_flag=True, help="Show the result in a dashboard" ) @click.pass_context def scan(ctx, root_path, api_url, patterns, format, interactive): """Scan a source code project to discover files and directories already present in the archive""" import asyncio from .scanner import run from .model import Tree from .plot import generate_sunburst from .dashboard.dashboard import run_app sre_patterns = set() if patterns: sre_patterns = { reg_obj for reg_obj in extract_regex_objs(PosixPath(root_path), patterns) } api_url = parse_url(api_url) source_tree = Tree(PosixPath(root_path)) loop = asyncio.get_event_loop() loop.run_until_complete(run(root_path, api_url, source_tree, sre_patterns)) if interactive: root = PosixPath(root_path) directories = source_tree.getDirectoriesInfo(root) figure = generate_sunburst(directories, root) run_app(figure, source_tree) else: source_tree.show(format) if __name__ == "__main__": scan()