diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -30,19 +30,25 @@ @scanner.command(name='scan') @click.argument('path', required=True, type=click.Path(exists=True)) -@click.option('--api-url', +@click.option('-u', '--api-url', default='https://archive.softwareheritage.org/api/1', metavar='API_URL', show_default=True, help="url for the api request") +@click.option('-f', '--format', + type=click.Choice(['text', 'json'], case_sensitive=False), + default='text', + help="select the output format") @click.pass_context -def scan(ctx, path, api_url): +def scan(ctx, path, api_url, format): """Scan a source code project to discover files and directories already present in the archive""" + api_url = parse_url(api_url) source_tree = Tree(PosixPath(path)) loop = asyncio.get_event_loop() loop.run_until_complete(run(path, api_url, source_tree)) - source_tree.show() + + source_tree.show(format) if __name__ == '__main__': diff --git a/swh/scanner/model.py b/swh/scanner/model.py --- a/swh/scanner/model.py +++ b/swh/scanner/model.py @@ -51,13 +51,16 @@ self.children[new_path].addNode(path, pid) - def show(self) -> None: + def show(self, format) -> None: """Print all the tree""" - isatty = sys.stdout.isatty() + if format == 'json': + print(self.getJsonTree()) + elif format == 'text': + isatty = sys.stdout.isatty() - print(colorize(str(self.path), Color.blue) if isatty - else str(self.path)) - self.printChildren(isatty) + print(colorize(str(self.path), Color.blue) if isatty + else str(self.path)) + self.printChildren(isatty) def printChildren(self, isatty: bool, inc: int = 0) -> None: for path, node in self.children.items(): @@ -82,3 +85,24 @@ print(colorize(rel_path, Color.green) if isatty else rel_path) else: print(colorize(rel_path, Color.red) if isatty else rel_path) + + def getJsonTree(self): + """Walk through the tree to discover content or directory that have + a persistent identifier. If a persistent identifier is found it saves + the path with the relative PID. + + Returns: + child_tree: the tree with the content/directory found + + """ + child_tree = {} + for path, child_node in self.children.items(): + rel_path = str(child_node.path.relative_to(self.path)) + if child_node.pid: + child_tree[rel_path] = child_node.pid + else: + next_tree = child_node.getJsonChild() + if next_tree: + child_tree[rel_path] = child_node.getJsonTree() + + return child_tree