diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..2ea652b --- /dev/null +++ b/conftest.py @@ -0,0 +1 @@ +pytest_plugins = ["swh.auth.pytest_plugin"] diff --git a/docs/index.rst b/docs/index.rst index 7eccf39..64cacb2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,96 +1,94 @@ .. _swh-web-client: .. include:: README.rst .. _swh-web-client-auth: Authentication -------------- If you have a user account registered on `Software Heritage Identity Provider`_, it is possible to authenticate requests made to the Web APIs through the use of an OpenID Connect bearer token. Sending authenticated requests can notably allow to lift API rate limiting depending on your permissions. To get this token, a dedicated CLI tool is made available when installing ``swh-web-client``: .. code-block:: text - $ swh web auth - Usage: swh web auth [OPTIONS] COMMAND [ARGS]... + $ swh auth + Usage: swh auth [OPTIONS] COMMAND [ARGS]... - Authenticate Software Heritage users with OpenID Connect. + Software Heritage Authentication tools. - This CLI tool eases the retrieval of bearer tokens to authenticate a user - querying the Software Heritage Web API. + This CLI eases the retrieval of a bearer token to authenticate a user + querying Software Heritage Web APIs. Options: --oidc-server-url TEXT URL of OpenID Connect server (default to "https://auth.softwareheritage.org/auth/") --realm-name TEXT Name of the OpenID Connect authentication realm (default to "SoftwareHeritage") --client-id TEXT OpenID Connect client identifier in the realm (default to "swh-web") -h, --help Show this message and exit. Commands: - generate-token Generate a new bearer token for Web API authentication. - login Alias for 'generate-token' - logout Alias for 'revoke-token' - revoke-token Revoke a bearer token used for Web API authentication. + generate-token Generate a new bearer token for a Web API authentication. + revoke-token Revoke a bearer token used for a Web API authentication. In order to get your tokens, you need to use the ``generate-token`` subcommand of the CLI tool by passing your username as argument. You will be prompted for your password and if the authentication succeeds a new OpenID Connect offline session will be created and token will be dumped to standard output. .. code-block:: text - $ swh web auth generate-token + $ swh auth --client-id swh-web generate-token Password: eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJmNjMzMD... To authenticate yourself, you need to send that token value in request headers when querying the Web API. Considering you have stored that token value in a TOKEN environment variable, you can perform an authenticated call the following way using ``curl``: .. code-block:: text $ curl -H "Authorization: Bearer ${TOKEN}" https://archive.softwareheritage.org/api/1/ Note that if you intend to use the :class:`swh.web.client.client.WebAPIClient` class, you can activate authentication by using the following code snippet:: from swh.web.client.client import WebAPIClient TOKEN = '.......' # Use "swh web auth login" command to get it client = WebAPIClient(bearer_token=TOKEN) # All requests to the Web API will be authenticated resp = client.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') It is also possible to revoke a token, preventing future Web API authentication when using it. The ``revoke-token`` subcommand of the CLI tool has to be used to perform that task. .. code-block:: text - $ swh web auth revoke-token $REFRESH_TOKEN + $ swh auth --client-id swh-web revoke-token $REFRESH_TOKEN Token successfully revoked. API Reference ------------- .. toctree:: :maxdepth: 2 /apidoc/swh.web.client .. _Software Heritage Identity Provider: https://auth.softwareheritage.org/auth/realms/SoftwareHeritage/account/ diff --git a/requirements-swh.txt b/requirements-swh.txt index 7d8c1a3..4ee1eb0 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,4 @@ # Add here internal Software Heritage dependencies, one per line. +swh.auth >= 0.6 swh.core >= 0.3 swh.model diff --git a/setup.py b/setup.py index 5c08132..1c5a1d7 100755 --- a/setup.py +++ b/setup.py @@ -1,74 +1,74 @@ #!/usr/bin/env python3 # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import open from os import path from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, "README.rst"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements # Edit this part to match your module. # Full sample: # https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py setup( name="swh.web.client", # example: swh.loader.pypi description="Software Heritage Web client", long_description=long_description, long_description_content_type="text/x-rst", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/source/swh-web-client/", packages=find_packages(), # packages's modules install_requires=parse_requirements() + parse_requirements("swh"), tests_require=parse_requirements("test"), setup_requires=["setuptools-scm"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 3 - Alpha", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-web-client", "Documentation": "https://docs.softwareheritage.org/devel/swh-web-client/", }, entry_points=""" [swh.cli.subcommands] - auth=swh.web.client.cli + web=swh.web.client.cli """, ) diff --git a/swh/web/client/auth.py b/swh/web/client/auth.py deleted file mode 100644 index a70cd40..0000000 --- a/swh/web/client/auth.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (C) 2020 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from typing import Any, Dict -from urllib.parse import urljoin - -import requests - -SWH_OIDC_SERVER_URL = "https://auth.softwareheritage.org/auth/" -SWH_REALM_NAME = "SoftwareHeritage" -SWH_WEB_CLIENT_ID = "swh-web" - - -class AuthenticationError(Exception): - """Authentication related error. - - Example: A bearer token has been revoked. - - """ - - pass - - -class OpenIDConnectSession: - """ - Simple class wrapping requests sent to an OpenID Connect server. - - Args: - oidc_server_url: URL of OpenID Connect server - realm_name: name of the OpenID Connect authentication realm - client_id: OpenID Connect client identifier in the realm - """ - - def __init__( - self, - oidc_server_url: str = SWH_OIDC_SERVER_URL, - realm_name: str = SWH_REALM_NAME, - client_id: str = SWH_WEB_CLIENT_ID, - ): - realm_url = urljoin(oidc_server_url, f"realms/{realm_name}/") - self.client_id = client_id - self.token_url = urljoin(realm_url, "protocol/openid-connect/token/") - self.logout_url = urljoin(realm_url, "protocol/openid-connect/logout/") - - def login(self, username: str, password: str) -> Dict[str, Any]: - """ - Login and create new offline OpenID Connect session. - - Args: - username: an existing username in the realm - password: password associated to username - - Returns: - The OpenID Connect session info - """ - return requests.post( - url=self.token_url, - data={ - "grant_type": "password", - "client_id": self.client_id, - "scope": "openid offline_access", - "username": username, - "password": password, - }, - ).json() - - def logout(self, token: str): - """ - Logout from an offline OpenID Connect session and invalidate - previously emitted tokens. - - Args: - token: a bearer token retrieved after login - """ - requests.post( - url=self.logout_url, - data={ - "client_id": self.client_id, - "scope": "openid", - "refresh_token": token, - }, - ) diff --git a/swh/web/client/cli.py b/swh/web/client/cli.py index 29c9186..1974154 100644 --- a/swh/web/client/cli.py +++ b/swh/web/client/cli.py @@ -1,282 +1,233 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict, List # WARNING: do not import unnecessary things here to keep cli startup time under # control import click from click.core import Context +from swh.auth.cli import auth as auth_cli +from swh.auth.cli import generate_token as auth_generate_token +from swh.auth.cli import revoke_token as auth_revoke_token from swh.core.cli import swh as swh_cli_group CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) # TODO (T1410): All generic config code should reside in swh.core.config DEFAULT_CONFIG_PATH = os.environ.get( "SWH_CONFIG_FILE", os.path.join(click.get_app_dir("swh"), "global.yml") ) DEFAULT_CONFIG: Dict[str, Any] = { "api_url": "https://archive.softwareheritage.org/api/1", "bearer_token": None, } @swh_cli_group.group(name="web", context_settings=CONTEXT_SETTINGS) @click.option( "-C", "--config-file", default=None, type=click.Path(exists=True, dir_okay=False, path_type=str), help=f"Configuration file (default: {DEFAULT_CONFIG_PATH})", ) @click.pass_context def web(ctx: Context, config_file: str): """Software Heritage web client""" import logging from swh.core import config from swh.web.client.client import WebAPIClient if not config_file: config_file = DEFAULT_CONFIG_PATH try: conf = config.read_raw_config(config.config_basepath(config_file)) if not conf: raise ValueError(f"Cannot parse configuration file: {config_file}") # TODO: Determine what the following conditional is for if config_file == DEFAULT_CONFIG_PATH: try: conf = conf["swh"]["web"]["client"] except KeyError: pass # recursive merge not done by config.read conf = config.merge_configs(DEFAULT_CONFIG, conf) except Exception: logging.warning( "Using default configuration (cannot load custom one)", exc_info=True ) conf = DEFAULT_CONFIG ctx.ensure_object(dict) ctx.obj["client"] = WebAPIClient(conf["api_url"], conf["bearer_token"]) @web.command(name="search") @click.argument( "query", required=True, nargs=-1, metavar="KEYWORD...", ) @click.option( "--limit", "limit", type=int, default=10, show_default=True, help="maximum number of results to show", ) @click.option( "--only-visited", is_flag=True, show_default=True, help="if true, only return origins with at least one visit by Software heritage", ) @click.option( "--url-encode/--no-url-encode", default=False, show_default=True, help="if true, escape origin URLs in results with percent encoding (RFC 3986)", ) @click.pass_context def search( ctx: Context, query: List[str], limit: int, only_visited: bool, url_encode: bool, ): """Search a query (as a list of keywords) into the Software Heritage archive. The search results are printed to CSV format, one result per line, using a tabulation as the field delimiter. """ import logging import sys import urllib.parse import requests client = ctx.obj["client"] keywords = " ".join(query) try: results = client.origin_search(keywords, limit, only_visited) for result in results: if url_encode: result["url"] = urllib.parse.quote_plus(result["url"]) print("\t".join(result.values())) except requests.HTTPError as err: logging.error("Could not retrieve search results: %s", err) except (BrokenPipeError, IOError): # Get rid of the BrokenPipeError message sys.stderr.close() @web.group(name="save", context_settings=CONTEXT_SETTINGS) @click.pass_context def savecodenow(ctx: Context,): """Subcommand to interact from the cli with the save code now feature """ pass @savecodenow.command("submit-request") @click.option("--delimiter", "-d", default=",") @click.pass_context def submit_request(ctx, delimiter: str) -> None: """Submit new save code now request through cli pipe. The expected format of the request if one csv row ``,``. Example: cat list-origins | swh web save submit-request echo svn;https://svn-url\ngit;https://git-url | swh web save \ submit-request --delimiter ';' Prints: The output of save code now requests as json output. """ import json import logging import sys logging.basicConfig(level=logging.INFO, stream=sys.stderr) client = ctx.obj["client"] processed_origins = [] for origin in sys.stdin: visit_type, origin = origin.rstrip().split(delimiter) try: saved_origin = client.origin_save(visit_type, origin) logging.info("Submitted origin (%s, %s)", visit_type, origin) processed_origins.append(saved_origin) except Exception as e: logging.warning( "Issue for origin (%s, %s)\n%s", origin, visit_type, e, ) logging.debug("Origin saved: %s", len(processed_origins)) print(json.dumps(processed_origins)) -@web.group(name="auth", context_settings=CONTEXT_SETTINGS) -@click.option( - "--oidc-server-url", - "oidc_server_url", - default="https://auth.softwareheritage.org/auth/", - help=( - "URL of OpenID Connect server (default to " - '"https://auth.softwareheritage.org/auth/")' - ), -) -@click.option( - "--realm-name", - "realm_name", - default="SoftwareHeritage", - help=( - "Name of the OpenID Connect authentication realm " - '(default to "SoftwareHeritage")' - ), -) -@click.option( - "--client-id", - "client_id", - default="swh-web", - help=("OpenID Connect client identifier in the realm " '(default to "swh-web")'), -) +def _forward_context(ctx: Context, *args, **kwargs): + ctx.forward(*args, **kwargs) + + +@web.group(name="auth", context_settings=CONTEXT_SETTINGS, deprecated=True) @click.pass_context -def auth(ctx: Context, oidc_server_url: str, realm_name: str, client_id: str): +def auth(ctx: Context): """ Authenticate Software Heritage users with OpenID Connect. This CLI tool eases the retrieval of a bearer token to authenticate a user querying the Software Heritage Web API. - """ - from swh.web.client.auth import OpenIDConnectSession - ctx.ensure_object(dict) - ctx.obj["oidc_session"] = OpenIDConnectSession( - oidc_server_url, realm_name, client_id - ) + That command group is deprecated, use ``swh auth`` instead. + """ + _forward_context(ctx, auth_cli, client_id="swh-web") -@auth.command("generate-token") +@auth.command("generate-token", deprecated=True) @click.argument("username") @click.pass_context def generate_token(ctx: Context, username: str): """ Generate a new bearer token for Web API authentication. Login with USERNAME, create a new OpenID Connect session and get bearer token. User will be prompted for his password and token will be printed to standard output. The created OpenID Connect session is an offline one so the provided token has a much longer expiration time than classical OIDC sessions (usually several dozens of days). """ - from getpass import getpass - - password = getpass() - - oidc_info = ctx.obj["oidc_session"].login(username, password) - if "refresh_token" in oidc_info: - print(oidc_info["refresh_token"]) - else: - print(oidc_info) + _forward_context(ctx, auth_generate_token, username=username) -@auth.command("login", deprecated=True) -@click.argument("username") -@click.pass_context -def login(ctx: Context, username: str): - """ - Alias for 'generate-token' - """ - ctx.forward(generate_token) - - -@auth.command("revoke-token") +@auth.command("revoke-token", deprecated=True) @click.argument("token") @click.pass_context def revoke_token(ctx: Context, token: str): """ Revoke a bearer token used for Web API authentication. Use TOKEN to logout from an offline OpenID Connect session. The token is definitely revoked after that operation. """ - ctx.obj["oidc_session"].logout(token) - print("Token successfully revoked.") - - -@auth.command("logout", deprecated=True) -@click.argument("token") -@click.pass_context -def logout(ctx: Context, token: str): - """ - Alias for 'revoke-token' - """ - ctx.forward(revoke_token) + _forward_context(ctx, auth_revoke_token, token=token) diff --git a/swh/web/client/tests/test_cli.py b/swh/web/client/tests/test_cli.py index a7bee0f..6d48877 100644 --- a/swh/web/client/tests/test_cli.py +++ b/swh/web/client/tests/test_cli.py @@ -1,115 +1,99 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os from click.testing import CliRunner -from swh.web.client.cli import auth, web +from swh.web.client.cli import auth_cli, auth_generate_token, auth_revoke_token, web runner = CliRunner() -oidc_profile = { - "access_token": "some-access-token", - "expires_in": 600, - "refresh_expires_in": 0, - "refresh_token": "some-refresh-token", - "token_type": "bearer", - "session_state": "some-state", - "scope": "openid email profile offline_access", -} - def test_auth_generate_token(mocker): - mock_getpass = mocker.patch("getpass.getpass") - mock_getpass.return_value = "password" - mock_oidc_session = mocker.patch("swh.web.client.auth.OpenIDConnectSession") - mock_login = mock_oidc_session.return_value.login - mock_login.return_value = oidc_profile - - for command in ("generate-token", "login"): - mock_login.side_effect = None - result = runner.invoke(auth, [command, "username"], input="password\n") - assert result.exit_code == 0 - assert oidc_profile["refresh_token"] in result.output - - mock_login.side_effect = Exception("Auth error") - - result = runner.invoke(auth, [command, "username"], input="password\n") - assert result.exit_code == 1 + forward_context = mocker.patch("swh.web.client.cli._forward_context") + runner.invoke(web, ["auth", "generate-token", "username"]) + assert forward_context.call_count == 2 + ctx = forward_context.call_args_list[0][0][0] + ctx2 = forward_context.call_args_list[1][0][0] + forward_context.assert_has_calls( + [ + mocker.call(ctx, auth_cli, client_id="swh-web"), + mocker.call(ctx2, auth_generate_token, username="username"), + ] + ) def test_auth_revoke_token(mocker): - - mock_oidc_session = mocker.patch("swh.web.client.auth.OpenIDConnectSession") - mock_logout = mock_oidc_session.return_value.logout - - for command in ("revoke-token", "logout"): - mock_logout.side_effect = None - result = runner.invoke(auth, [command, oidc_profile["refresh_token"]]) - assert result.exit_code == 0 - - mock_logout.side_effect = Exception("Auth error") - result = runner.invoke(auth, [command, oidc_profile["refresh_token"]]) - assert result.exit_code == 1 + forward_context = mocker.patch("swh.web.client.cli._forward_context") + runner.invoke(web, ["auth", "revoke-token", "token"]) + assert forward_context.call_count == 2 + ctx = forward_context.call_args_list[0][0][0] + ctx2 = forward_context.call_args_list[1][0][0] + forward_context.assert_has_calls( + [ + mocker.call(ctx, auth_cli, client_id="swh-web"), + mocker.call(ctx2, auth_revoke_token, token="token"), + ] + ) def test_save_code_now_through_cli(mocker, web_api_mock, tmp_path, cli_config_path): """Trigger save code now from the cli creates new save code now requests""" origins = [ ("git", "https://gitlab.org/gazelle/itest"), ("git", "https://git.renater.fr/anonscm/git/6po/6po.git"), ("git", "https://github.com/colobot/colobot"), # this will be rejected ("tig", "invalid-and-refusing-to-save-this"), ] origins_csv = "\n".join(map(lambda t: ",".join(t), origins)) origins_csv = f"{origins_csv}\n" temp_file = os.path.join(tmp_path, "tmp.csv") with open(temp_file, "w") as f: f.write(origins_csv) with open(temp_file, "r") as f: result = runner.invoke( web, ["--config-file", cli_config_path, "save", "submit-request"], input=f, catch_exceptions=False, ) assert result.exit_code == 0, f"Unexpected output: {result.output}" actual_save_requests = json.loads(result.output.strip()) assert len(actual_save_requests) == 3 expected_save_requests = [ { "origin_url": "https://gitlab.org/gazelle/itest", "save_request_date": "2021-04-20T11:34:38.752929+00:00", "save_request_status": "accepted", "save_task_status": "not yet scheduled", "visit_date": None, "visit_type": "git", }, { "origin_url": "https://git.renater.fr/anonscm/git/6po/6po.git", "save_request_date": "2021-04-20T11:34:40.115226+00:00", "save_request_status": "accepted", "save_task_status": "not yet scheduled", "visit_date": None, "visit_type": "git", }, { "origin_url": "https://github.com/colobot/colobot", "save_request_date": "2021-04-20T11:40:47.667492+00:00", "save_request_status": "accepted", "save_task_status": "not yet scheduled", "visit_date": None, "visit_type": "git", }, ] for actual_save_request in actual_save_requests: assert actual_save_request in expected_save_requests