diff --git a/PKG-INFO b/PKG-INFO index 3998adc..0f9e8fd 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,48 +1,48 @@ Metadata-Version: 2.1 Name: swh.web.client -Version: 0.2.0 +Version: 0.2.1 Summary: Software Heritage Web client Home-page: https://forge.softwareheritage.org/source/swh-web-client/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-web-client Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-web-client/ Description: Software Heritage - Web client ============================== Client for Software Heritage Web applications, via their APIs. Sample usage ------------ .. code-block:: python from swh.web.client.client import WebAPIClient cli = WebAPIClient() # retrieve any archived object via its SWHID cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # same, but for specific object types cli.revision('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # get() always retrieve entire objects, following pagination # WARNING: this might *not* be what you want for large objects cli.get('swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a') # type-specific methods support explicit iteration through pages next(cli.snapshot('swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764')) Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing diff --git a/debian/changelog b/debian/changelog deleted file mode 100644 index c6cbb11..0000000 --- a/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ - (0.0.1-1) unstable; urgency=low - - * - - -- AUTHOR-NAME diff --git a/debian/compat b/debian/compat deleted file mode 100644 index ec63514..0000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/debian/control b/debian/control deleted file mode 100644 index 68a9ef5..0000000 --- a/debian/control +++ /dev/null @@ -1,19 +0,0 @@ -Source: # example: swh-loader-pypi -Maintainer: Software Heritage developers -Section: python -Priority: optional -Build-Depends: debhelper (>= 9), - dh-python (>= 2), - python3-all, - python3-nose, - python3-setuptools, - python3-swh.core, - python3-swh.storage, - python3-vcversioner -Standards-Version: 3.9.6 -Homepage: https://forge.softwareheritage.org/source// - -Package: python3- # example: python3-swh.loader.pypi -Architecture: all -Depends: ${misc:Depends}, ${python3:Depends} -Description: Software Heritage diff --git a/debian/copyright b/debian/copyright deleted file mode 100644 index f216ea5..0000000 --- a/debian/copyright +++ /dev/null @@ -1,22 +0,0 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ - -Files: * -Copyright: 2019 The Software Heritage developers -License: GPL-3+ - -License: GPL-3+ - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - . - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - . - You should have received a copy of the GNU General Public License - along with this program. If not, see . - . - On Debian systems, the complete text of the GNU General Public - License version 3 can be found in `/usr/share/common-licenses/GPL-3'. diff --git a/debian/rules b/debian/rules deleted file mode 100755 index 32f59e6..0000000 --- a/debian/rules +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/make -f - -export PYBUILD_NAME= # example: swh.loader.pypi -export PYBUILD_TEST_ARGS=--with-doctest -sva !db,!fs - -%: - dh $@ --with python3 --buildsystem=pybuild - -override_dh_install: - dh_install - rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py diff --git a/debian/source/format b/debian/source/format deleted file mode 100644 index 163aaf8..0000000 --- a/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (quilt) diff --git a/swh.web.client.egg-info/PKG-INFO b/swh.web.client.egg-info/PKG-INFO index 3998adc..0f9e8fd 100644 --- a/swh.web.client.egg-info/PKG-INFO +++ b/swh.web.client.egg-info/PKG-INFO @@ -1,48 +1,48 @@ Metadata-Version: 2.1 Name: swh.web.client -Version: 0.2.0 +Version: 0.2.1 Summary: Software Heritage Web client Home-page: https://forge.softwareheritage.org/source/swh-web-client/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-web-client Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-web-client/ Description: Software Heritage - Web client ============================== Client for Software Heritage Web applications, via their APIs. Sample usage ------------ .. code-block:: python from swh.web.client.client import WebAPIClient cli = WebAPIClient() # retrieve any archived object via its SWHID cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # same, but for specific object types cli.revision('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # get() always retrieve entire objects, following pagination # WARNING: this might *not* be what you want for large objects cli.get('swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a') # type-specific methods support explicit iteration through pages next(cli.snapshot('swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764')) Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing diff --git a/swh.web.client.egg-info/SOURCES.txt b/swh.web.client.egg-info/SOURCES.txt index 032f99c..c69ffff 100644 --- a/swh.web.client.egg-info/SOURCES.txt +++ b/swh.web.client.egg-info/SOURCES.txt @@ -1,51 +1,45 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile README.rst api_data.py mypy.ini pyproject.toml pytest.ini requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini -debian/changelog -debian/compat -debian/control -debian/copyright -debian/rules -debian/source/format docs/.gitignore docs/Makefile docs/README.rst docs/conf.py docs/index.rst docs/_static/.placeholder docs/_templates/.placeholder swh/__init__.py swh.web.client.egg-info/PKG-INFO swh.web.client.egg-info/SOURCES.txt swh.web.client.egg-info/dependency_links.txt swh.web.client.egg-info/entry_points.txt swh.web.client.egg-info/requires.txt swh.web.client.egg-info/top_level.txt swh/web/__init__.py swh/web/client/__init__.py swh/web/client/auth.py swh/web/client/cli.py swh/web/client/client.py swh/web/client/py.typed swh/web/client/tests/__init__.py swh/web/client/tests/api_data.py swh/web/client/tests/conftest.py swh/web/client/tests/gen-api-data.sh swh/web/client/tests/test_cli.py swh/web/client/tests/test_web_api_client.py \ No newline at end of file diff --git a/swh/web/client/cli.py b/swh/web/client/cli.py index 22666dd..4c53093 100644 --- a/swh/web/client/cli.py +++ b/swh/web/client/cli.py @@ -1,96 +1,97 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import click from click.core import Context + from swh.core.cli import swh as swh_cli_group CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @swh_cli_group.group(name="auth", context_settings=CONTEXT_SETTINGS) @click.option( "--oidc-server-url", "oidc_server_url", default="https://auth.softwareheritage.org/auth/", help=( "URL of OpenID Connect server (default to " '"https://auth.softwareheritage.org/auth/")' ), ) @click.option( "--realm-name", "realm_name", default="SoftwareHeritage", help=( "Name of the OpenID Connect authentication realm " '(default to "SoftwareHeritage")' ), ) @click.option( "--client-id", "client_id", default="swh-web", help=("OpenID Connect client identifier in the realm " '(default to "swh-web")'), ) @click.pass_context def auth(ctx: Context, oidc_server_url: str, realm_name: str, client_id: str): """ Authenticate Software Heritage users with OpenID Connect. This CLI tool eases the retrieval of a bearer token to authenticate a user querying the Software Heritage Web API. """ from swh.web.client.auth import OpenIDConnectSession ctx.ensure_object(dict) ctx.obj["oidc_session"] = OpenIDConnectSession( oidc_server_url, realm_name, client_id ) @auth.command("login") @click.argument("username") @click.pass_context def login(ctx: Context, username: str): """ Login and create new offline OpenID Connect session. Login with USERNAME, create a new OpenID Connect session and get bearer token. User will be prompted for his password and tokens will be printed to standard output. The created OpenID Connect session is an offline one so the provided token has a much longer expiration time than classical OIDC sessions (usually several dozens of days). """ from getpass import getpass password = getpass() oidc_info = ctx.obj["oidc_session"].login(username, password) if "refresh_token" in oidc_info: print(oidc_info["refresh_token"]) else: print(oidc_info) @auth.command("logout") @click.argument("token") @click.pass_context def logout(ctx: Context, token: str): """ Logout from an offline OpenID Connect session. Use TOKEN to logout from an offline OpenID Connect session. The token is definitely revoked after that operation. """ ctx.obj["oidc_session"].logout(token) print("Successfully logged out from OpenID Connect session") diff --git a/swh/web/client/client.py b/swh/web/client/client.py index 8d5250a..4a495f3 100644 --- a/swh/web/client/client.py +++ b/swh/web/client/client.py @@ -1,489 +1,509 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Python client for the Software Heritage Web API Light wrapper around requests for the archive API, taking care of data conversions and pagination. .. code-block:: python from swh.web.client.client import WebAPIClient cli = WebAPIClient() # retrieve any archived object via its SWHID cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # same, but for specific object types cli.revision('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # get() always retrieve entire objects, following pagination # WARNING: this might *not* be what you want for large objects cli.get('swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a') # type-specific methods support explicit iteration through pages next(cli.snapshot('swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764')) """ from typing import Any, Callable, Dict, Iterator, List, Optional, Union from urllib.parse import urlparse import dateutil.parser import requests from swh.model.identifiers import ( CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT, SWHID, parse_swhid, ) SWHIDish = Union[SWHID, str] ORIGIN_VISIT = "origin_visit" def _get_swhid(swhidish: SWHIDish) -> SWHID: """Parse string to SWHID if needed""" if isinstance(swhidish, str): return parse_swhid(swhidish) else: return swhidish -def typify(data: Any, obj_type: str) -> Any: +def typify_json(data: Any, obj_type: str) -> Any: """Type API responses using pythonic types where appropriate The following conversions are performed: - identifiers are converted from strings to SWHID instances - timestamps are converted from strings to datetime.datetime objects """ def to_swhid(object_type, s): return SWHID(object_type=object_type, object_id=s) def to_date(s): return dateutil.parser.parse(s) def obj_type_of_entry_type(s): if s == "file": return CONTENT elif s == "dir": return DIRECTORY elif s == "rev": return REVISION else: raise ValueError(f"invalid directory entry type: {s}") if obj_type == SNAPSHOT: for name, target in data.items(): if target["target_type"] != "alias": # alias targets do not point to objects via SWHIDs; others do target["target"] = to_swhid(target["target_type"], target["target"]) elif obj_type == REVISION: data["id"] = to_swhid(obj_type, data["id"]) data["directory"] = to_swhid(DIRECTORY, data["directory"]) for key in ("date", "committer_date"): data[key] = to_date(data[key]) for parent in data["parents"]: parent["id"] = to_swhid(REVISION, parent["id"]) elif obj_type == RELEASE: data["id"] = to_swhid(obj_type, data["id"]) data["date"] = to_date(data["date"]) data["target"] = to_swhid(data["target_type"], data["target"]) elif obj_type == DIRECTORY: dir_swhid = None for entry in data: dir_swhid = dir_swhid or to_swhid(obj_type, entry["dir_id"]) entry["dir_id"] = dir_swhid entry["target"] = to_swhid( obj_type_of_entry_type(entry["type"]), entry["target"] ) elif obj_type == CONTENT: pass # nothing to do for contents elif obj_type == ORIGIN_VISIT: data["date"] = to_date(data["date"]) if data["snapshot"] is not None: data["snapshot"] = to_swhid(SNAPSHOT, data["snapshot"]) else: raise ValueError(f"invalid object type: {obj_type}") return data class WebAPIClient: """Client for the Software Heritage archive Web API, see https://archive.softwareheritage.org/api/ """ def __init__( self, api_url: str = "https://archive.softwareheritage.org/api/1", bearer_token: Optional[str] = None, ): """Create a client for the Software Heritage Web API See: https://archive.softwareheritage.org/api/ Args: api_url: base URL for API calls (default: "https://archive.softwareheritage.org/api/1") bearer_token: optional bearer token to do authenticated API calls """ api_url = api_url.rstrip("/") u = urlparse(api_url) self.api_url = api_url self.api_path = u.path self.bearer_token = bearer_token - self._getters: Dict[str, Callable[[SWHIDish], Any]] = { + self._getters: Dict[str, Callable[[SWHIDish, bool], Any]] = { CONTENT: self.content, DIRECTORY: self.directory, RELEASE: self.release, REVISION: self.revision, SNAPSHOT: self._get_snapshot, } def _call( self, query: str, http_method: str = "get", **req_args ) -> requests.models.Response: """Dispatcher for archive API invocation Args: query: API method to be invoked, rooted at api_url http_method: HTTP method to be invoked, one of: 'get', 'head' req_args: extra keyword arguments for requests.get()/.head() Raises: requests.HTTPError: if HTTP request fails and http_method is 'get' """ url = None if urlparse(query).scheme: # absolute URL url = query else: # relative URL; prepend base API URL url = "/".join([self.api_url, query]) r = None headers = {} if self.bearer_token is not None: headers = {"Authorization": f"Bearer {self.bearer_token}"} if http_method == "get": r = requests.get(url, **req_args, headers=headers) r.raise_for_status() elif http_method == "head": r = requests.head(url, **req_args, headers=headers) else: raise ValueError(f"unsupported HTTP method: {http_method}") return r - def _get_snapshot(self, swhid: SWHIDish) -> Dict[str, Any]: + def _get_snapshot(self, swhid: SWHIDish, typify: bool = True) -> Dict[str, Any]: """Analogous to self.snapshot(), but zipping through partial snapshots, merging them together before returning """ snapshot = {} - for snp in self.snapshot(swhid): + for snp in self.snapshot(swhid, typify): snapshot.update(snp) return snapshot - def get(self, swhid: SWHIDish, **req_args) -> Any: + def get(self, swhid: SWHIDish, typify: bool = True, **req_args) -> Any: """Retrieve information about an object of any kind Dispatcher method over the more specific methods content(), directory(), etc. Note that this method will buffer the entire output in case of long, iterable output (e.g., for snapshot()), see the iter() method for streaming. """ swhid_ = _get_swhid(swhid) - return self._getters[swhid_.object_type](swhid_) + return self._getters[swhid_.object_type](swhid_, typify) - def iter(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]: + def iter( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Iterator[Dict[str, Any]]: """Stream over the information about an object of any kind Streaming variant of get() """ swhid_ = _get_swhid(swhid) obj_type = swhid_.object_type if obj_type == SNAPSHOT: - yield from self.snapshot(swhid_) + yield from self.snapshot(swhid_, typify) elif obj_type == REVISION: - yield from [self.revision(swhid_)] + yield from [self.revision(swhid_, typify)] elif obj_type == RELEASE: - yield from [self.release(swhid_)] + yield from [self.release(swhid_, typify)] elif obj_type == DIRECTORY: - yield from self.directory(swhid_) + yield from self.directory(swhid_, typify) elif obj_type == CONTENT: - yield from [self.content(swhid_)] + yield from [self.content(swhid_, typify)] else: raise ValueError(f"invalid object type: {obj_type}") - def content(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: + def content( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Dict[str, Any]: """Retrieve information about a content object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call( - f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args - ).json(), - CONTENT, - ) - - def directory(self, swhid: SWHIDish, **req_args) -> List[Dict[str, Any]]: + json = self._call( + f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args + ).json() + return typify_json(json, CONTENT) if typify else json + + def directory( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> List[Dict[str, Any]]: """Retrieve information about a directory object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call(f"directory/{_get_swhid(swhid).object_id}/", **req_args).json(), - DIRECTORY, - ) - - def revision(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: + json = self._call( + f"directory/{_get_swhid(swhid).object_id}/", **req_args + ).json() + return typify_json(json, DIRECTORY) if typify else json + + def revision( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Dict[str, Any]: """Retrieve information about a revision object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json(), - REVISION, - ) + json = self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json() + return typify_json(json, REVISION) if typify else json - def release(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: + def release( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Dict[str, Any]: """Retrieve information about a release object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json(), - RELEASE, - ) + json = self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json() + return typify_json(json, RELEASE) if typify else json - def snapshot(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]: + def snapshot( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Iterator[Dict[str, Any]]: """Retrieve information about a snapshot object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Returns: an iterator over partial snapshots (dictionaries mapping branch names to information about where they point to), each containing a subset of available branches Raises: requests.HTTPError: if HTTP request fails """ done = False r = None query = f"snapshot/{_get_swhid(swhid).object_id}/" while not done: r = self._call(query, http_method="get", **req_args) - yield typify(r.json()["branches"], SNAPSHOT) + json = r.json()["branches"] + yield typify_json(json, SNAPSHOT) if typify else json if "next" in r.links and "url" in r.links["next"]: query = r.links["next"]["url"] else: done = True def visits( self, origin: str, per_page: Optional[int] = None, last_visit: Optional[int] = None, + typify: bool = True, **req_args, ) -> Iterator[Dict[str, Any]]: """List visits of an origin Args: origin: the URL of a software origin per_page: the number of visits to list last_visit: visit to start listing from + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Returns: an iterator over visits of the origin Raises: requests.HTTPError: if HTTP request fails """ done = False r = None params = [] if last_visit is not None: params.append(("last_visit", last_visit)) if per_page is not None: params.append(("per_page", per_page)) query = f"origin/{origin}/visits/" while not done: r = self._call(query, http_method="get", params=params, **req_args) - yield from [typify(v, ORIGIN_VISIT) for v in r.json()] + yield from [typify_json(v, ORIGIN_VISIT) if typify else v for v in r.json()] if "next" in r.links and "url" in r.links["next"]: params = [] query = r.links["next"]["url"] else: done = True def content_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a content object exists in the archive Args: swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: requests.HTTPError: if HTTP request fails """ return bool( self._call( f"content/sha1_git:{_get_swhid(swhid).object_id}/", http_method="head", **req_args, ) ) def directory_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a directory object exists in the archive Args: swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: requests.HTTPError: if HTTP request fails """ return bool( self._call( f"directory/{_get_swhid(swhid).object_id}/", http_method="head", **req_args, ) ) def revision_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a revision object exists in the archive Args: swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: requests.HTTPError: if HTTP request fails """ return bool( self._call( f"revision/{_get_swhid(swhid).object_id}/", http_method="head", **req_args, ) ) def release_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a release object exists in the archive Args: swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: requests.HTTPError: if HTTP request fails """ return bool( self._call( f"release/{_get_swhid(swhid).object_id}/", http_method="head", **req_args, ) ) def snapshot_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a snapshot object exists in the archive Args: swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: requests.HTTPError: if HTTP request fails """ return bool( self._call( f"snapshot/{_get_swhid(swhid).object_id}/", http_method="head", **req_args, ) ) def content_raw(self, swhid: SWHIDish, **req_args) -> Iterator[bytes]: """Iterate over the raw content of a content object Args: swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ r = self._call( f"content/sha1_git:{_get_swhid(swhid).object_id}/raw/", stream=True, **req_args, ) r.raise_for_status() yield from r.iter_content(chunk_size=None, decode_unicode=False) diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py index 082afbf..79842df 100644 --- a/swh/web/client/tests/test_web_api_client.py +++ b/swh/web/client/tests/test_web_api_client.py @@ -1,145 +1,174 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import json + from dateutil.parser import parse as parse_date from swh.model.identifiers import parse_swhid +from .api_data import API_DATA + def test_get_content(web_api_client, web_api_mock): swhid = parse_swhid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") obj = web_api_client.get(swhid) assert obj["length"] == 151810 for key in ("length", "status", "checksums", "data_url"): assert key in obj assert obj["checksums"]["sha1_git"] == str(swhid).split(":")[3] assert obj["checksums"]["sha1"] == "dc2830a9e72f23c1dfebef4413003221baa5fb62" assert obj == web_api_client.content(swhid) def test_get_directory(web_api_client, web_api_mock): swhid = parse_swhid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") obj = web_api_client.get(swhid) assert len(obj) == 35 # number of directory entries assert all(map(lambda entry: entry["dir_id"] == swhid, obj)) dir_entry = obj[0] assert dir_entry["type"] == "file" assert dir_entry["target"] == parse_swhid( "swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814" ) assert dir_entry["name"] == ".bzrignore" assert dir_entry["length"] == 582 assert obj == web_api_client.directory(swhid) def test_get_release(web_api_client, web_api_mock): swhid = parse_swhid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") obj = web_api_client.get(swhid) assert obj["id"] == swhid assert obj["author"]["fullname"] == "Paul Tagliamonte " assert obj["author"]["name"] == "Paul Tagliamonte" assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00") assert obj["name"] == "0.9.9" assert obj["target_type"] == "revision" assert obj["target"] == parse_swhid( "swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636" ) assert not obj["synthetic"] assert obj == web_api_client.release(swhid) def test_get_revision(web_api_client, web_api_mock): swhid = parse_swhid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6") obj = web_api_client.get(swhid) assert obj["id"] == swhid for role in ("author", "committer"): assert ( obj[role]["fullname"] == "Nicolas Dandrimont " ) assert obj[role]["name"] == "Nicolas Dandrimont" timestamp = parse_date("2014-08-18T18:18:25+02:00") assert obj["date"] == timestamp assert obj["committer_date"] == timestamp assert obj["message"].startswith("Merge branch") assert obj["merge"] assert len(obj["parents"]) == 2 assert obj["parents"][0]["id"] == parse_swhid( "swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4" ) assert obj["parents"][1]["id"] == parse_swhid( "swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283" ) assert obj == web_api_client.revision(swhid) def test_get_snapshot(web_api_client, web_api_mock): # small snapshot, the one from Web API doc swhid = parse_swhid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a") obj = web_api_client.get(swhid) assert len(obj) == 4 assert obj["refs/heads/master"]["target_type"] == "revision" assert obj["refs/heads/master"]["target"] == parse_swhid( "swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3" ) assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision" assert obj["refs/tags/dpkt-1.7"]["target"] == parse_swhid( "swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8" ) def test_iter_snapshot(web_api_client, web_api_mock): # large snapshot from the Linux kernel, usually spanning two pages swhid = parse_swhid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764") obj = web_api_client.snapshot(swhid) snp = {} for partial in obj: snp.update(partial) assert len(snp) == 1391 def test_authentication(web_api_client, web_api_mock): rel_id = "b9db10d00835e9a43e2eebef2db1d04d4ae82342" url = f"{web_api_client.api_url}/release/{rel_id}/" refresh_token = "user-refresh-token" web_api_client.bearer_token = refresh_token swhid = parse_swhid(f"swh:1:rel:{rel_id}") web_api_client.get(swhid) sent_request = web_api_mock._adapter.last_request assert sent_request.url == url assert "Authorization" in sent_request.headers assert sent_request.headers["Authorization"] == f"Bearer {refresh_token}" def test_get_visits(web_api_client, web_api_mock): obj = web_api_client.visits( "https://github.com/NixOS/nixpkgs", last_visit=50, per_page=10 ) visits = [v for v in obj] assert len(visits) == 20 timestamp = parse_date("2018-07-31 04:34:23.298931+00:00") assert visits[0]["date"] == timestamp assert visits[0]["snapshot"] is None snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976" assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid) + + +def test_get_json(web_api_client, web_api_mock): + swhids = [ + "swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1", + "swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6", + "swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342", + "swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6", + "swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a", + ] + + for swhid in swhids: + actual = web_api_client.get(swhid, typify=False) + expected = None + # Fetch raw JSON data from the generated API_DATA + for url, data in API_DATA.items(): + object_id = swhid[len("swh:1:XXX:") :] + if object_id in url: + expected = json.loads(data) + # Special case: snapshots response differs slightly from the Web API + if swhid.startswith("swh:1:snp:"): + expected = expected["branches"] + break + + assert actual == expected diff --git a/tox.ini b/tox.ini index a4ce93d..e4be56b 100644 --- a/tox.ini +++ b/tox.ini @@ -1,35 +1,35 @@ [tox] envlist=black,flake8,mypy,py3 [testenv] extras = testing deps = pytest-cov commands = pytest --doctest-modules \ {envsitepackagesdir}/swh/web/client \ --cov={envsitepackagesdir}/swh/web/client \ --cov-branch {posargs} [testenv:black] skip_install = true deps = - black + black==19.10b0 commands = {envpython} -m black --check swh [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 [testenv:mypy] extras = testing deps = mypy commands = mypy swh