diff --git a/README.md b/README.md
new file mode 100644
index 0000000..07dcc67
--- /dev/null
+++ b/README.md
@@ -0,0 +1,9 @@
+swh-scanner
+===========
+
+Source code scanner to analyze code bases and compare them with source code
+artifacts archived by Software Heritage
+
+See the
+[documentation](https://docs.softwareheritage.org/devel/swh-scanner/index.html)
+for more details.
\ No newline at end of file
diff --git a/README.rst b/README.rst
deleted file mode 120000
index cffceba..0000000
--- a/README.rst
+++ /dev/null
@@ -1 +0,0 @@
-docs/README.rst
\ No newline at end of file
diff --git a/docs/README.rst b/docs/README.rst
deleted file mode 100644
index 4ecee0d..0000000
--- a/docs/README.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-Software Heritage - Code Scanner
-================================
-
-Source code scanner using the
-`Software Heritage `_
-`archive `_
-as knowledge base.
-
-
-Sample usage
-------------
-
-.. code-block:: shell
-
- $ swh scanner scan --help
-
- Usage: swh scanner scan [OPTIONS] PATH
-
- Scan a source code project to discover files and directories already
- present in the archive
-
- Options:
- -u, --api-url API_URL url for the api request [default:
- https://archive.softwareheritage.org/api/1]
- -f, --format [text|json] select the output format
- -h, --help Show this message and exit.
diff --git a/docs/cli.rst b/docs/cli.rst
new file mode 100644
index 0000000..0e8d528
--- /dev/null
+++ b/docs/cli.rst
@@ -0,0 +1,6 @@
+Command-line interface
+======================
+
+.. click:: swh.scanner.cli:scan
+ :prog: swh scanner scan
+ :show-nested:
diff --git a/docs/index.rst b/docs/index.rst
index 90a3290..b7f9c62 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,16 +1,17 @@
.. _swh-scanner:
-.. include:: README.rst
+Software Heritage - Code Scanner
+================================
+
+Source code scanner using the
+`Software Heritage `_
+`archive `_
+as knowledge base.
.. toctree::
- :maxdepth: 2
+ :maxdepth: 1
:caption: Contents:
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+ cli
+ /apidoc/swh.scanner
diff --git a/setup.py b/setup.py
index f9835b3..84e7e72 100755
--- a/setup.py
+++ b/setup.py
@@ -1,74 +1,74 @@
#!/usr/bin/env python3
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
-with open(path.join(here, "README.rst"), encoding="utf-8") as f:
+with open(path.join(here, "README.md"), encoding="utf-8") as f:
long_description = f.read()
def parse_requirements(name=None):
if name:
reqf = "requirements-%s.txt" % name
else:
reqf = "requirements.txt"
requirements = []
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith("#"):
continue
requirements.append(line)
return requirements
# Edit this part to match your module.
# Full sample:
# https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py
setup(
name="swh.scanner",
description="Software Heritage code scanner",
long_description=long_description,
- long_description_content_type="text/x-rst",
+ long_description_content_type="text/markdown",
python_requires=">=3.7",
author="Software Heritage developers",
author_email="swh-devel@inria.fr",
url="https://forge.softwareheritage.org/diffusion/DTSCN/",
packages=find_packages(), # packages's modules
install_requires=parse_requirements() + parse_requirements("swh"),
tests_require=parse_requirements("test"),
setup_requires=["setuptools-scm"],
use_scm_version=True,
extras_require={"testing": parse_requirements("test")},
include_package_data=True,
entry_points="""
[swh.cli.subcommands]
scanner=swh.scanner.cli:scanner
""",
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 3 - Alpha",
],
project_urls={
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
"Funding": "https://www.softwareheritage.org/donate",
"Source": "https://forge.softwareheritage.org/source/swh-scanner",
"Documentation": "https://docs.softwareheritage.org/devel/swh-scanner/",
},
)
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
index 7fde5a8..5ee2382 100644
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -1,116 +1,118 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import click
from pathlib import PosixPath
from typing import Tuple
from swh.core.cli import CONTEXT_SETTINGS
@click.group(name="scanner", context_settings=CONTEXT_SETTINGS)
@click.pass_context
def scanner(ctx):
"""Software Heritage Scanner tools."""
pass
def parse_url(url):
if not url.startswith("https://"):
url = "https://" + url
if not url.endswith("/"):
url += "/"
return url
def extract_regex_objs(root_path: PosixPath, patterns: Tuple[str]) -> object:
"""Generates a regex object for each pattern given in input and checks if
the path is a subdirectory or relative to the root path.
Yields:
an SRE_Pattern object
"""
import glob
import fnmatch
import re
from .exceptions import InvalidDirectoryPath
for pattern in patterns:
for path in glob.glob(pattern):
dirpath = PosixPath(path)
if root_path not in dirpath.parents:
error_msg = (
f'The path "{dirpath}" is not a subdirectory or relative '
f'to the root directory path: "{root_path}"'
)
raise InvalidDirectoryPath(error_msg)
if glob.glob(pattern):
regex = fnmatch.translate(str(PosixPath(pattern)))
yield re.compile(regex)
@scanner.command(name="scan")
@click.argument("root_path", required=True, type=click.Path(exists=True))
@click.option(
"-u",
"--api-url",
default="https://archive.softwareheritage.org/api/1",
metavar="API_URL",
show_default=True,
- help="url for the api request",
+ help="URL for the api request",
)
@click.option(
"--exclude",
"-x",
"patterns",
metavar="PATTERN",
multiple=True,
- help="recursively exclude a specific pattern",
+ help="Exclude directories using glob patterns \
+ (e.g., '*.git' to exclude all .git directories)",
)
@click.option(
"-f",
"--format",
- type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False),
default="text",
- help="select the output format",
+ show_default=True,
+ type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False),
+ help="The output format",
)
@click.option(
- "-i", "--interactive", is_flag=True, help="show the result in a dashboard"
+ "-i", "--interactive", is_flag=True, help="Show the result in a dashboard"
)
@click.pass_context
def scan(ctx, root_path, api_url, patterns, format, interactive):
"""Scan a source code project to discover files and directories already
present in the archive"""
import asyncio
from .scanner import run
from .model import Tree
from .plot import generate_sunburst
from .dashboard.dashboard import run_app
sre_patterns = set()
if patterns:
sre_patterns = {
reg_obj for reg_obj in extract_regex_objs(PosixPath(root_path), patterns)
}
api_url = parse_url(api_url)
source_tree = Tree(PosixPath(root_path))
loop = asyncio.get_event_loop()
loop.run_until_complete(run(root_path, api_url, source_tree, sre_patterns))
if interactive:
root = PosixPath(root_path)
directories = source_tree.getDirectoriesInfo(root)
figure = generate_sunburst(directories, root)
run_app(figure, source_tree)
else:
source_tree.show(format)
if __name__ == "__main__":
scan()