diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -107,7 +107,7 @@ @scanner.command(name="scan") -@click.argument("root_path", required=True, type=click.Path(exists=True)) +@click.argument("root_path", default=".", type=click.Path(exists=True)) @click.option( "-u", "--api-url", @@ -129,9 +129,11 @@ "-f", "--output-format", "out_fmt", - default="text", + default="summary", show_default=True, - type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False), + type=click.Choice( + ["summary", "text", "json", "ndjson", "sunburst"], case_sensitive=False + ), help="The output format", ) @click.option( @@ -158,6 +160,19 @@ """Scan a source code project to discover files and directories already present in the archive. + The command can provide different output using the --output-format option:\n + \b + summary: display a general summary of what the scanner found + + text: display the scan result as a text based tree-like view of all the + file, using color to indicate the file status. + + json: write all collected data on standard output as JSON + + json: write all collected data on standard output as Newline Delimited JSON + + sunburst: produce a dynamic chart as .html file. (in $PWD/chart.html) + The source code project can be checked using different policies that can be set using the -p/--policy option:\n \b diff --git a/swh/scanner/output.py b/swh/scanner/output.py --- a/swh/scanner/output.py +++ b/swh/scanner/output.py @@ -40,7 +40,9 @@ self.source_tree = source_tree def show(self, mode=DEFAULT_OUTPUT): - if mode == "text": + if mode == "summary": + self.summary() + elif mode == "text": isatty = sys.stdout.isatty() self.print_text(isatty) elif mode == "sunburst": @@ -90,6 +92,55 @@ print(f"{begin}{rel_path}{end}") + def summary(self): + directories_with_known_files = set() + + total_files = 0 + total_directories = 0 + known_files = 0 + full_known_directories = 0 + partially_known_directories = 0 + + contents = [] + directories = [] + + for node in self.source_tree.iter_tree(): + if node.object_type == "content": + contents.append(node) + elif node.object_type == "directory": + directories.append(node) + else: + assert False, "unreachable" + + total_files = len(contents) + for c in contents: + if self.nodes_data[c.swhid()]["known"]: + known_files += 1 + path = c.data[self.get_path_name(c)] + dir_name = os.path.dirname(path) + directories_with_known_files.add(dir_name) + + total_directories = len(directories) + for d in directories: + if self.nodes_data[d.swhid()]["known"]: + full_known_directories += 1 + else: + path = d.data[self.get_path_name(d)] + if path in directories_with_known_files: + partially_known_directories += 1 + + kp = known_files * 100 // total_files + fkp = full_known_directories * 100 // total_directories + pkp = partially_known_directories * 100 // total_directories + print("Files: {total_files:10d}") + print(" known: {known_files:10d} ({kp:3d}%)") + print("directories: {total_directories:10d}") + print(" fully-known: {full_known_directories:10d} ({fkp:3d}%)") + print( + " partially-known: {partially_known_directories:10d} ({pkp:3d}%)" + ) + print("(see other --output-format for more details)") + def data_as_json(self): json = {} for node in self.source_tree.iter_tree():