diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,4 @@ # Add here internal Software Heritage dependencies, one per line. swh.core >= 0.3 swh.model >= 2.6.4 +swh.auth diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -6,6 +6,8 @@ # WARNING: do not import unnecessary things here to keep cli startup time under # control import os +from pathlib import Path +import sys from typing import Any, Dict, Optional import click @@ -25,9 +27,10 @@ CONFIG_ENVVAR = "SWH_CONFIG_FILE" DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml") +SWH_API_ROOT = "https://archive.softwareheritage.org/api/1/" DEFAULT_CONFIG: Dict[str, Any] = { "web-api": { - "url": "https://archive.softwareheritage.org/api/1/", + "url": SWH_API_ROOT, "auth-token": None, } } @@ -60,6 +63,23 @@ return config +def check_auth(config): + """check there is some authentication configured + + Issue a warning otherwise""" + web_api_conf = config["web-api"] + if web_api_conf["url"] == SWH_API_ROOT and not web_api_conf.get("auth-token"): + # Only warn for the production API + # + # XXX We should probably warn at the time of the creation of the HTTP + # Client, after checking if the token is actually valid. + msg = "Warning: you are not authenticated with the Software Heritage API\n" + msg += "login to get a higher rate-limit" + click.echo(click.style(msg, fg="red"), file=sys.stderr) + msg = "See `swh scanner login -h` for more information." + click.echo(click.style(msg, fg="yellow"), file=sys.stderr) + + @swh_cli_group.group( name="scanner", context_settings=CONTEXT_SETTINGS, @@ -101,11 +121,91 @@ if config_file is not None: conf = config.read_raw_config(config.config_basepath(config_file)) conf = config.merge_configs(DEFAULT_CONFIG, conf) + else: + config_file = DEFAULT_CONFIG_PATH ctx.ensure_object(dict) + ctx.obj["config_path"] = Path(config_file) ctx.obj["config"] = conf +@scanner.command(name="login") +@click.option( + "-f", + "--force/--no-force", + default=False, + help="Proceed even if a token is already present in the config", +) +@click.pass_context +def login(ctx, force): + """Perform the necessary step to log yourself in the API + + You will need to first create an account before running this operation. To + create an account, visit: https://archive.softwareheritage.org/ + """ + context = ctx.obj + + # Check we are actually talking to the Software Heritage itself. + web_api_config = context["config"]["web-api"] + current_url = web_api_config["url"] + config_path = context["config_path"] + if current_url != SWH_API_ROOT: + msg = "`swh scanner login` only works with the Software Heritage API\n" + click.echo(click.style(msg, fg="red"), file=sys.stderr) + msg = f"Configured in '%s' as web-api.url={current_url}\n" + msg %= click.format_filename(bytes(config_path)) + click.echo(click.style(msg, fg="red"), file=sys.stderr) + ctx.exit(1) + + # Check for an existing value in the configuration + if web_api_config.get("auth-token") is not None: + click.echo(click.style("You appear to already be logged in.", fg="green")) + if not force: + click.echo("Hint: use `--force` to overwrite the current token") + ctx.exit() + click.echo(click.style("Continuing because of `--force`.", fg="yellow")) + + # Obtain a valid token through the API + # + # Coming from the swh auth generate-token code + # (this command might eventually move there) + from getpass import getpass + from swh.auth.keycloak import KeycloakError, keycloak_error_message + from swh.auth.keycloak import KeycloakOpenIDConnect + + msg = "Please enter your SWH Archive credentials" + click.echo(click.style(msg, fg="yellow")) + msg = "If you do not already have an account, create one one at:" + click.echo(click.style(msg, fg="yellow")) + msg = " https://archive.softwareheritage.org/" + click.echo(click.style(msg, fg="yellow")) + username = click.prompt("username") + password = getpass() + try: + url = "https://auth.softwareheritage.org/auth/" + realm = "SoftwareHeritage" + client = "swh-web" + oidc_client = KeycloakOpenIDConnect(url, realm, client) + scope = "openid offline_access" + oidc_info = oidc_client.login(username, password, scope) + token = oidc_info["refresh_token"] + msg = "token retrieved successfully" + click.echo(click.style(msg, fg="green")) + except KeycloakError as ke: + print(keycloak_error_message(ke)) + click.exit(1) + + # Write the new token into the file. + web_api_config["auth-token"] = token + # TODO use ruamel.yaml to preserve comments in config file + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(yaml.safe_dump(context["config"])) + msg = "\nConfiguration file '%s' written successfully." + msg %= click.format_filename(bytes(config_path)) + click.echo(click.style(msg, fg="green")) + click.echo("`swh scanner` will now be authenticated with the new token.") + + @scanner.command(name="scan") @click.argument("root_path", default=".", type=click.Path(exists=True)) @click.option( @@ -201,6 +301,7 @@ import swh.scanner.scanner as scanner config = setup_config(ctx, api_url) + check_auth(config) extra_info = set(extra_info) scanner.scan(config, root_path, patterns, out_fmt, interactive, policy, extra_info)