diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py index 40a03cc..6dc6a65 100644 --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -1,49 +1,55 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import asyncio from pathlib import PosixPath from .scanner import run from .model import Tree from swh.core.cli import CONTEXT_SETTINGS @click.group(name='scanner', context_settings=CONTEXT_SETTINGS) @click.pass_context def scanner(ctx): '''Software Heritage Scanner tools.''' pass def parse_url(url): if not url.startswith('https://'): url = 'https://' + url if not url.endswith('/'): url += '/' return url @scanner.command(name='scan') @click.argument('path', required=True, type=click.Path(exists=True)) -@click.option('--api-url', +@click.option('-u', '--api-url', default='https://archive.softwareheritage.org/api/1', metavar='API_URL', show_default=True, help="url for the api request") +@click.option('-f', '--format', + type=click.Choice(['text', 'json'], case_sensitive=False), + default='text', + help="select the output format") @click.pass_context -def scan(ctx, path, api_url): +def scan(ctx, path, api_url, format): """Scan a source code project to discover files and directories already present in the archive""" + api_url = parse_url(api_url) source_tree = Tree(PosixPath(path)) loop = asyncio.get_event_loop() loop.run_until_complete(run(path, api_url, source_tree)) - source_tree.show() + + source_tree.show(format) if __name__ == '__main__': scan() diff --git a/swh/scanner/model.py b/swh/scanner/model.py index c4a3d56..836fb61 100644 --- a/swh/scanner/model.py +++ b/swh/scanner/model.py @@ -1,84 +1,108 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations import sys from pathlib import PosixPath from typing import Any, Dict from enum import Enum from swh.model.identifiers import ( DIRECTORY, CONTENT ) class Color(Enum): blue = '\033[94m' green = '\033[92m' red = '\033[91m' end = '\033[0m' def colorize(text: str, color: Color): return color.value + text + Color.end.value class Tree: """Representation of a file system structure """ def __init__(self, path: PosixPath, father: Tree = None): self.father = father self.path = path self.otype = DIRECTORY if path.is_dir() else CONTENT self.pid = '' self.children: Dict[PosixPath, Tree] = {} def addNode(self, path: PosixPath, pid: str = None) -> None: """Recursively add a new node path """ relative_path = path.relative_to(self.path) if relative_path == PosixPath('.'): if pid is not None: self.pid = pid return new_path = self.path.joinpath(relative_path.parts[0]) if new_path not in self.children: self.children[new_path] = Tree(new_path, self) self.children[new_path].addNode(path, pid) - def show(self) -> None: + def show(self, format) -> None: """Print all the tree""" - isatty = sys.stdout.isatty() + if format == 'json': + print(self.getJsonTree()) + elif format == 'text': + isatty = sys.stdout.isatty() - print(colorize(str(self.path), Color.blue) if isatty - else str(self.path)) - self.printChildren(isatty) + print(colorize(str(self.path), Color.blue) if isatty + else str(self.path)) + self.printChildren(isatty) def printChildren(self, isatty: bool, inc: int = 0) -> None: for path, node in self.children.items(): self.printNode(node, isatty, inc) if node.children: node.printChildren(isatty, inc+1) def printNode(self, node: Any, isatty: bool, inc: int) -> None: rel_path = str(node.path.relative_to(self.path)) print('│ '*inc, end='') if node.otype == DIRECTORY: if node.pid: print(colorize(rel_path, Color.blue) if isatty else rel_path, end='') else: print(colorize(rel_path, Color.red) if isatty else rel_path, end='') print('/') elif node.otype == CONTENT: if node.pid: print(colorize(rel_path, Color.green) if isatty else rel_path) else: print(colorize(rel_path, Color.red) if isatty else rel_path) + + def getJsonTree(self): + """Walk through the tree to discover content or directory that have + a persistent identifier. If a persistent identifier is found it saves + the path with the relative PID. + + Returns: + child_tree: the tree with the content/directory found + + """ + child_tree = {} + for path, child_node in self.children.items(): + rel_path = str(child_node.path.relative_to(self.path)) + if child_node.pid: + child_tree[rel_path] = child_node.pid + else: + next_tree = child_node.getJsonChild() + if next_tree: + child_tree[rel_path] = child_node.getJsonTree() + + return child_tree