diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py index 820fd5ad..cf618307 100644 --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,490 +1,497 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging import sys import tempfile import uuid import json import yaml import click import xmltodict -from swh.deposit.client import PublicApiDepositClient +from swh.deposit.client import PublicApiDepositClient, MaintenanceError from swh.deposit.cli import deposit logger = logging.getLogger(__name__) class InputError(ValueError): """Input script error """ pass def generate_slug(): """Generate a slug (sample purposes). """ return str(uuid.uuid4()) def _url(url): """Force the /1 api version at the end of the url (avoiding confusing issues without it). Args: url (str): api url used by cli users Returns: Top level api url to actually request """ if not url.endswith("/1"): url = "%s/1" % url return url def generate_metadata_file(name, external_id, authors, temp_dir): """Generate a temporary metadata file with the minimum required metadata This generates a xml file in a temporary location and returns the path to that file. This is up to the client of that function to clean up the temporary file. Args: name (str): Software's name external_id (str): External identifier (slug) or generated one authors (List[str]): List of author names Returns: Filepath to the metadata generated file """ path = os.path.join(temp_dir, "metadata.xml") # generate a metadata file with the minimum required metadata codemetadata = { "entry": { "@xmlns": "http://www.w3.org/2005/Atom", "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", "codemeta:name": name, "codemeta:identifier": external_id, "codemeta:author": [ {"codemeta:name": author_name} for author_name in authors ], }, } logging.debug("Temporary file: %s", path) logging.debug("Metadata dict to generate as xml: %s", codemetadata) s = xmltodict.unparse(codemetadata, pretty=True) logging.debug("Metadata dict as xml generated: %s", s) with open(path, "w") as fp: fp.write(s) return path def _client(url, username, password): """Instantiate a client to access the deposit api server Args: url (str): Deposit api server username (str): User password (str): User's password """ client = PublicApiDepositClient( {"url": url, "auth": {"username": username, "password": password},} ) return client def _collection(client): """Retrieve the client's collection """ # retrieve user's collection sd_content = client.service_document() if "error" in sd_content: raise InputError("Service document retrieval: %s" % (sd_content["error"],)) collection = sd_content["service"]["workspace"]["collection"]["sword:name"] return collection def client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, name, authors, temp_dir, ): """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) requires: - an existing software archive - an existing metadata file or author(s) and name provided in params - A binary deposit (create/update) requires an existing software archive - A metadata deposit (create/update) requires an existing metadata file or author(s) and name provided in params - A deposit update requires a deposit_id This will not prevent all failure cases though. The remaining errors are already dealt with by the underlying api client. Raises: - InputError explaining the issue + InputError explaining the user input related issue + MaintenanceError explaining the api status Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if not slug: # generate one as this is mandatory slug = generate_slug() if not metadata: if name and authors: metadata = generate_metadata_file(name, slug, authors, temp_dir) elif not archive_deposit and not partial and not deposit_id: # If we meet all the following conditions: # * there is not an archive-only deposit # * it is not part of a multipart deposit (either create/update # or finish) # * it misses either name or authors raise InputError( "Either a metadata file (--metadata) or both --author and " "--name must be provided, unless this is an archive-only " "deposit." ) elif name or authors: # If we are generating metadata, then all mandatory metadata # must be present raise InputError( "Either a metadata file (--metadata) or both --author and " "--name must be provided." ) else: # TODO: this is a multipart deposit, we might want to check that # metadata are deposited at some point pass elif name or authors: raise InputError( "Using a metadata file (--metadata) is incompatible with " "--author and --name, which are used to generate one." ) if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit must be provided for metadata " "deposit (either a filepath or --name and --author)" ) if not archive and not metadata and partial: raise InputError( "Please provide an actionable command. See --help for more " "information" ) if replace and not deposit_id: raise InputError("To update an existing deposit, you must provide its id") client = _client(url, username, password) if not collection: collection = _collection(client) return { "archive": archive, "username": username, "password": password, "metadata": metadata, "collection": collection, "slug": slug, "in_progress": partial, "client": client, "url": url, "deposit_id": deposit_id, "replace": replace, } def _subdict(d, keys): "return a dict from d with only given keys" return {k: v for k, v in d.items() if k in keys} def deposit_create(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug("Create deposit") client = config["client"] keys = ("collection", "archive", "metadata", "slug", "in_progress") return client.deposit_create(**_subdict(config, keys)) def deposit_update(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug("Update deposit") client = config["client"] keys = ( "collection", "deposit_id", "archive", "metadata", "slug", "in_progress", "replace", ) return client.deposit_update(**_subdict(config, keys)) @deposit.command() @click.option("--username", required=True, help="(Mandatory) User's name") @click.option( "--password", required=True, help="(Mandatory) User's associated password" ) @click.option( "--archive", type=click.Path(exists=True), help="(Optional) Software archive to deposit", ) @click.option( "--metadata", type=click.Path(exists=True), help=( "(Optional) Path to xml metadata file. If not provided, " "this will use a file named .metadata.xml" ), ) # noqa @click.option( "--archive-deposit/--no-archive-deposit", default=False, help="(Optional) Software archive only deposit", ) @click.option( "--metadata-deposit/--no-metadata-deposit", default=False, help="(Optional) Metadata only deposit", ) @click.option( "--collection", help="(Optional) User's collection. If not provided, this will be fetched.", ) # noqa @click.option( "--slug", help=( "(Optional) External system information identifier. " "If not provided, it will be generated" ), ) # noqa @click.option( "--partial/--no-partial", default=False, help=( "(Optional) The deposit will be partial, other deposits " "will have to take place to finalize it." ), ) # noqa @click.option( "--deposit-id", default=None, help="(Optional) Update an existing partial deposit with its identifier", ) # noqa @click.option( "--replace/--no-replace", default=False, help="(Optional) Update by replacing existing metadata to a deposit", ) # noqa @click.option( "--url", default="https://deposit.softwareheritage.org", help=( "(Optional) Deposit server api endpoint. By default, " "https://deposit.softwareheritage.org/1" ), ) # noqa @click.option("--verbose/--no-verbose", default=False, help="Verbose mode") @click.option("--name", help="Software name") @click.option( "--author", multiple=True, help="Software author(s), this can be repeated as many times" " as there are authors", ) @click.option( "-f", "--format", "output_format", default="logging", type=click.Choice(["logging", "yaml", "json"]), help="Output format results.", ) @click.pass_context def upload( ctx, username, password, archive=None, metadata=None, archive_deposit=False, metadata_deposit=False, collection=None, slug=None, partial=False, deposit_id=None, replace=False, url="https://deposit.softwareheritage.org", verbose=False, name=None, author=None, output_format=None, ): """Software Heritage Public Deposit Client Create/Update deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ url = _url(url) config = {} with tempfile.TemporaryDirectory() as temp_dir: try: logger.debug("Parsing cli options") config = client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, name, author, temp_dir, ) except InputError as e: logger.error("Problem during parsing options: %s", e) sys.exit(1) + except MaintenanceError as e: + logger.error(e) + sys.exit(1) if verbose: logger.info("Parsed configuration: %s" % (config,)) deposit_id = config["deposit_id"] if deposit_id: r = deposit_update(config, logger) else: r = deposit_create(config, logger) print_result(r, output_format) @deposit.command() @click.option( "--url", default="https://deposit.softwareheritage.org", help="(Optional) Deposit server api endpoint. By default, " "https://deposit.softwareheritage.org/1", ) @click.option("--username", required=True, help="(Mandatory) User's name") @click.option( "--password", required=True, help="(Mandatory) User's associated password" ) @click.option("--deposit-id", default=None, required=True, help="Deposit identifier.") @click.option( "-f", "--format", "output_format", default="logging", type=click.Choice(["logging", "yaml", "json"]), help="Output format results.", ) @click.pass_context def status(ctx, url, username, password, deposit_id, output_format): """Deposit's status """ url = _url(url) logger.debug("Status deposit") try: client = _client(url, username, password) collection = _collection(client) except InputError as e: logger.error("Problem during parsing options: %s", e) sys.exit(1) + except MaintenanceError as e: + logger.error(e) + sys.exit(1) print_result( client.deposit_status(collection=collection, deposit_id=deposit_id), output_format, ) def print_result(data, output_format): if output_format == "json": click.echo(json.dumps(data)) elif output_format == "yaml": click.echo(yaml.dump(data)) else: logger.info(data) diff --git a/swh/deposit/client.py b/swh/deposit/client.py index 3ac6d732..3e638795 100644 --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -1,639 +1,660 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining an swh-deposit client """ import hashlib import os import requests import xmltodict import logging from abc import ABCMeta, abstractmethod +from typing import Any, Dict from urllib.parse import urljoin from swh.core.config import SWHConfig logger = logging.getLogger(__name__) +class MaintenanceError(ValueError): + """Informational maintenance error exception + + """ + + pass + + def _parse(stream, encoding="utf-8"): """Given a xml stream, parse the result. Args: stream (bytes/text): The stream to parse encoding (str): The encoding to use if to decode the bytes stream Returns: A dict of values corresponding to the parsed xml """ if isinstance(stream, bytes): stream = stream.decode(encoding) data = xmltodict.parse(stream, encoding=encoding, process_namespaces=False) if "entry" in data: data = data["entry"] if "sword:error" in data: data = data["sword:error"] return dict(data) def _parse_with_filter(stream, encoding="utf-8", keys=[]): """Given a xml stream, parse the result and filter with keys. Args: stream (bytes/text): The stream to parse encoding (str): The encoding to use if to decode the bytes stream keys ([str]): Keys to filter the parsed result Returns: A dict of values corresponding to the parsed xml filtered by the keys provided. """ data = _parse(stream, encoding=encoding) m = {} for key in keys: m[key] = data.get(key) return m class BaseApiDepositClient(SWHConfig): """Deposit client base class """ CONFIG_BASE_FILENAME = "deposit/client" DEFAULT_CONFIG = { "url": ("str", "http://localhost:5006"), "auth": ("dict", {}), # with optional 'username'/'password' keys } def __init__(self, config=None, _client=requests): super().__init__() if config is None: self.config = super().parse_config_file() else: self.config = config self._client = _client self.base_url = self.config["url"].strip("/") + "/" auth = self.config["auth"] if auth == {}: self.auth = None else: self.auth = (auth["username"], auth["password"]) def do(self, method, url, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in self._methods' keys Returns: The request's execution """ if hasattr(self._client, method): method_fn = getattr(self._client, method) else: raise ValueError("Development error, unsupported method %s" % (method)) if self.auth: kwargs["auth"] = self.auth full_url = urljoin(self.base_url, url.lstrip("/")) return method_fn(full_url, *args, **kwargs) class PrivateApiDepositClient(BaseApiDepositClient): """Private API deposit client to: - read a given deposit's archive(s) - read a given deposit's metadata - update a given deposit's status """ def archive_get(self, archive_update_url, archive): """Retrieve the archive from the deposit to a local directory. Args: archive_update_url (str): The full deposit archive(s)'s raw content to retrieve locally archive (str): the local archive's path where to store the raw content Returns: The archive path to the local archive to load. Or None if any problem arose. """ r = self.do("get", archive_update_url, stream=True) if r.ok: with open(archive, "wb") as f: for chunk in r.iter_content(): f.write(chunk) return archive msg = "Problem when retrieving deposit archive at %s" % (archive_update_url,) logger.error(msg) raise ValueError(msg) def metadata_get(self, metadata_url): """Retrieve the metadata information on a given deposit. Args: metadata_url (str): The full deposit metadata url to retrieve locally Returns: The dictionary of metadata for that deposit or None if any problem arose. """ r = self.do("get", metadata_url) if r.ok: return r.json() msg = "Problem when retrieving metadata at %s" % metadata_url logger.error(msg) raise ValueError(msg) def status_update( self, update_status_url, status, revision_id=None, directory_id=None, origin_url=None, ): """Update the deposit's status. Args: update_status_url (str): the full deposit's archive status (str): The status to update the deposit with revision_id (str/None): the revision's identifier to update to directory_id (str/None): the directory's identifier to update to origin_url (str/None): deposit's associated origin url """ payload = {"status": status} if revision_id: payload["revision_id"] = revision_id if directory_id: payload["directory_id"] = directory_id if origin_url: payload["origin_url"] = origin_url self.do("put", update_status_url, json=payload) def check(self, check_url): """Check the deposit's associated data (metadata, archive(s)) Args: check_url (str): the full deposit's check url """ r = self.do("get", check_url) if r.ok: data = r.json() return data["status"] msg = "Problem when checking deposit %s" % check_url logger.error(msg) raise ValueError(msg) class BaseDepositClient(BaseApiDepositClient, metaclass=ABCMeta): """Base Deposit client to access the public api. """ def __init__(self, config, error_msg=None, empty_result={}): super().__init__(config) self.error_msg = error_msg self.empty_result = empty_result @abstractmethod def compute_url(self, *args, **kwargs): """Compute api url endpoint to query.""" pass @abstractmethod def compute_method(self, *args, **kwargs): """Http method to use on the url""" pass @abstractmethod def parse_result_ok(self, xml_content): """Given an xml result from the api endpoint, parse it and returns a dict. """ pass def compute_information(self, *args, **kwargs): """Compute some more information given the inputs (e.g http headers, ...) """ return {} def parse_result_error(self, xml_content): """Given an error response in xml, parse it into a dict. Returns: dict with following keys: 'error': The error message 'detail': Some more detail about the error if any """ return _parse_with_filter( xml_content, keys=["summary", "detail", "sword:verboseDescription"] ) def do_execute(self, method, url, info): """Execute the http query to url using method and info information. By default, execute a simple query to url with the http method. Override this in daughter class to improve the default behavior if needed. """ return self.do(method, url) - def execute(self, *args, **kwargs): + def execute(self, *args, **kwargs) -> Dict[str, Any]: """Main endpoint to prepare and execute the http query to the api. + Raises: + MaintenanceError if some api maintenance is happening. + + Returns: + Dict of computed api data + """ url = self.compute_url(*args, **kwargs) method = self.compute_method(*args, **kwargs) info = self.compute_information(*args, **kwargs) try: r = self.do_execute(method, url, info) except Exception as e: msg = self.error_msg % (url, e) r = self.empty_result r.update( {"error": msg,} ) return r else: if r.ok: if int(r.status_code) == 204: # 204 returns no body return {"status": r.status_code} else: return self.parse_result_ok(r.text) else: error = self.parse_result_error(r.text) empty = self.empty_result error.update(empty) + if r.status_code == 503: + summary = error.get("summary") + detail = error.get("sword:verboseDescription") + # Maintenance error + if summary and detail: + raise MaintenanceError(f"{summary}: {detail}") error.update( {"status": r.status_code,} ) return error class ServiceDocumentDepositClient(BaseDepositClient): """Service Document information retrieval. """ def __init__(self, config): super().__init__( config, error_msg="Service document failure at %s: %s", empty_result={"collection": None}, ) def compute_url(self, *args, **kwargs): return "/servicedocument/" def compute_method(self, *args, **kwargs): return "get" def parse_result_ok(self, xml_content): """Parse service document's success response. """ return _parse(xml_content) class StatusDepositClient(BaseDepositClient): """Status information on a deposit. """ def __init__(self, config): super().__init__( config, error_msg="Status check failure at %s: %s", empty_result={ "deposit_status": None, "deposit_status_detail": None, "deposit_swh_id": None, }, ) def compute_url(self, collection, deposit_id): return "/%s/%s/status/" % (collection, deposit_id) def compute_method(self, *args, **kwargs): return "get" def parse_result_ok(self, xml_content): """Given an xml content as string, returns a deposit dict. """ return _parse_with_filter( xml_content, keys=[ "deposit_id", "deposit_status", "deposit_status_detail", "deposit_swh_id", "deposit_swh_id_context", "deposit_swh_anchor_id", "deposit_swh_anchor_id_context", "deposit_external_id", ], ) class BaseCreateDepositClient(BaseDepositClient): """Deposit client base class to post new deposit. """ def __init__(self, config): super().__init__( config, error_msg="Post Deposit failure at %s: %s", empty_result={"deposit_id": None, "deposit_status": None,}, ) def compute_url(self, collection, *args, **kwargs): return "/%s/" % collection def compute_method(self, *args, **kwargs): return "post" def parse_result_ok(self, xml_content): """Given an xml content as string, returns a deposit dict. """ return _parse_with_filter( xml_content, keys=[ "deposit_id", "deposit_status", "deposit_status_detail", "deposit_date", ], ) def _compute_information( self, collection, filepath, in_progress, slug, is_archive=True ): """Given a filepath, compute necessary information on that file. Args: filepath (str): Path to a file is_archive (bool): is it an archive or not? Returns: dict with keys: 'content-type': content type associated 'md5sum': md5 sum 'filename': filename """ filename = os.path.basename(filepath) if is_archive: md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() extension = filename.split(".")[-1] if "zip" in extension: content_type = "application/zip" else: content_type = "application/x-tar" else: content_type = None md5sum = None return { "slug": slug, "in_progress": in_progress, "content-type": content_type, "md5sum": md5sum, "filename": filename, "filepath": filepath, } def compute_information( self, collection, filepath, in_progress, slug, is_archive=True, **kwargs ): info = self._compute_information( collection, filepath, in_progress, slug, is_archive=is_archive ) info["headers"] = self.compute_headers(info) return info def do_execute(self, method, url, info): with open(info["filepath"], "rb") as f: return self.do(method, url, data=f, headers=info["headers"]) class CreateArchiveDepositClient(BaseCreateDepositClient): """Post an archive (binary) deposit client.""" def compute_headers(self, info): return { "SLUG": info["slug"], "CONTENT_MD5": info["md5sum"], "IN-PROGRESS": str(info["in_progress"]), "CONTENT-TYPE": info["content-type"], "CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],), } class UpdateArchiveDepositClient(CreateArchiveDepositClient): """Update (add/replace) an archive (binary) deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/media/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class CreateMetadataDepositClient(BaseCreateDepositClient): """Post a metadata deposit client.""" def compute_headers(self, info): return { "SLUG": info["slug"], "IN-PROGRESS": str(info["in_progress"]), "CONTENT-TYPE": "application/atom+xml;type=entry", } class UpdateMetadataDepositClient(CreateMetadataDepositClient): """Update (add/replace) a metadata deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/metadata/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" def _multipart_info(self, info, info_meta): files = [ ( "file", (info["filename"], open(info["filepath"], "rb"), info["content-type"]), ), ( "atom", ( info_meta["filename"], open(info_meta["filepath"], "rb"), "application/atom+xml", ), ), ] headers = { "SLUG": info["slug"], "CONTENT_MD5": info["md5sum"], "IN-PROGRESS": str(info["in_progress"]), } return files, headers def compute_information( self, collection, archive, metadata, in_progress, slug, **kwargs ): info = self._compute_information(collection, archive, in_progress, slug) info_meta = self._compute_information( collection, metadata, in_progress, slug, is_archive=False ) files, headers = self._multipart_info(info, info_meta) return {"files": files, "headers": headers} def do_execute(self, method, url, info): return self.do(method, url, files=info["files"], headers=info["headers"]) class UpdateMultipartDepositClient(CreateMultipartDepositClient): """Update a multipart deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/metadata/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class PublicApiDepositClient(BaseApiDepositClient): """Public api deposit client.""" def service_document(self): """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(self.config).execute() def deposit_status(self, collection, deposit_id): """Retrieve status information on a deposit.""" return StatusDepositClient(self.config).execute(collection, deposit_id) def deposit_create( self, collection, slug, archive=None, metadata=None, in_progress=False ): """Create a new deposit (archive, metadata, both as multipart).""" if archive and not metadata: return CreateArchiveDepositClient(self.config).execute( collection, archive, in_progress, slug ) elif not archive and metadata: return CreateMetadataDepositClient(self.config).execute( collection, metadata, in_progress, slug, is_archive=False ) else: return CreateMultipartDepositClient(self.config).execute( collection, archive, metadata, in_progress, slug ) def deposit_update( self, collection, deposit_id, slug, archive=None, metadata=None, in_progress=False, replace=False, ): """Update (add/replace) existing deposit (archive, metadata, both).""" r = self.deposit_status(collection, deposit_id) if "error" in r: return r status = r["deposit_status"] if status != "partial": return { "error": "You can only act on deposit with status 'partial'", "detail": "The deposit %s has status '%s'" % (deposit_id, status), "deposit_status": status, "deposit_id": deposit_id, } if archive and not metadata: r = UpdateArchiveDepositClient(self.config).execute( collection, archive, in_progress, slug, deposit_id=deposit_id, replace=replace, ) elif not archive and metadata: r = UpdateMetadataDepositClient(self.config).execute( collection, metadata, in_progress, slug, deposit_id=deposit_id, replace=replace, ) else: r = UpdateMultipartDepositClient(self.config).execute( collection, archive, metadata, in_progress, slug, deposit_id=deposit_id, replace=replace, ) if "error" in r: return r return self.deposit_status(collection, deposit_id) diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py index c788afeb..3cfcc2b2 100644 --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -1,468 +1,457 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import logging import os import re from unittest.mock import MagicMock from click.testing import CliRunner import pytest -from swh.deposit.client import PublicApiDepositClient +from swh.deposit.client import PublicApiDepositClient, MaintenanceError from swh.deposit.cli.client import generate_slug, _url, _client, _collection, InputError from swh.deposit.cli import deposit as cli from ..conftest import TEST_USER EXAMPLE_SERVICE_DOCUMENT = { "service": {"workspace": {"collection": {"sword:name": "softcol",}}} } @pytest.fixture def slug(): return generate_slug() @pytest.fixture def client_mock(mocker, slug): """A succesfull deposit client with hard-coded default values """ mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) mock_client = MagicMock() mocker.patch("swh.deposit.cli.client._client", return_value=mock_client) mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT mock_client.deposit_create.return_value = '{"foo": "bar"}' return mock_client @pytest.fixture -def client_mock_down(mocker, slug): +def client_mock_api_down(mocker, slug): + """A mock client whose connection with api fails due to maintenance issue + + """ mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) mock_client = MagicMock() mocker.patch("swh.deposit.cli.client._client", return_value=mock_client) - mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT - mock_client.deposit_create.return_value = '{"foo": "bar"}' + mock_client.service_document.side_effect = MaintenanceError( + "Database backend maintenance: Temporarily unavailable, try again later." + ) return mock_client def test_url(): assert _url("http://deposit") == "http://deposit/1" assert _url("https://other/1") == "https://other/1" def test_client(): client = _client("http://deposit", "user", "pass") assert isinstance(client, PublicApiDepositClient) def test_collection_error(): mock_client = MagicMock() mock_client.service_document.return_value = {"error": "something went wrong"} with pytest.raises(InputError) as e: _collection(mock_client) assert "Service document retrieval: something went wrong" == str(e.value) def test_collection_ok(): mock_client = MagicMock() mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT collection_name = _collection(mock_client) assert collection_name == "softcol" -def test_deposit_with_server_ok_backend_down( - sample_archive, mocker, caplog, client_mock, slug, tmp_path +def test_collection_ko_because_downtime(): + mock_client = MagicMock() + mock_client.service_document.side_effect = MaintenanceError("downtime") + with pytest.raises(MaintenanceError, match="downtime"): + _collection(mock_client) + + +def test_deposit_with_server_down_for_maintenance( + sample_archive, mocker, caplog, client_mock_api_down, slug, tmp_path ): - """ Deposit failure due to maintenance down time should be explicit in error msg - """ - metadata_path = os.path.join(tmp_path, "metadata.xml") - mocker.patch( - "swh.deposit.cli.client.tempfile.TemporaryDirectory", - return_value=contextlib.nullcontext(str(tmp_path)), - ) + """ Deposit failure due to maintenance down time should be explicit + """ runner = CliRunner() result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--author", "Jane Doe", ], ) - assert result.exit_code == 0, result.output + assert result.exit_code == 1, result.output assert result.output == "" assert caplog.record_tuples == [ - ("swh.deposit.cli.client", logging.INFO, '{"foo": "bar"}'), + ( + "swh.deposit.cli.client", + logging.ERROR, + "Database backend maintenance: Temporarily unavailable, try again later.", + ) ] - client_mock.deposit_create.assert_called_once_with( - archive=sample_archive["path"], - collection="softcol", - in_progress=False, - metadata=metadata_path, - slug=slug, - ) - - with open(metadata_path) as fd: - assert ( - fd.read() - == f"""\ - - -\ttest-project -\t{slug} -\t -\t\tJane Doe -\t -""" - ) + client_mock_api_down.service_document.assert_called_once_with() def test_single_minimal_deposit( sample_archive, mocker, caplog, client_mock, slug, tmp_path ): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa metadata_path = os.path.join(tmp_path, "metadata.xml") mocker.patch( "swh.deposit.cli.client.tempfile.TemporaryDirectory", return_value=contextlib.nullcontext(str(tmp_path)), ) runner = CliRunner() result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--author", "Jane Doe", ], ) assert result.exit_code == 0, result.output assert result.output == "" assert caplog.record_tuples == [ ("swh.deposit.cli.client", logging.INFO, '{"foo": "bar"}'), ] client_mock.deposit_create.assert_called_once_with( archive=sample_archive["path"], collection="softcol", in_progress=False, metadata=metadata_path, slug=slug, ) with open(metadata_path) as fd: assert ( fd.read() == f"""\ \ttest-project \t{slug} \t \t\tJane Doe \t """ ) def test_metadata_validation(sample_archive, mocker, caplog, tmp_path): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa slug = generate_slug() mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) mock_client = MagicMock() mocker.patch("swh.deposit.cli.client._client", return_value=mock_client) mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT mock_client.deposit_create.return_value = '{"foo": "bar"}' metadata_path = os.path.join(tmp_path, "metadata.xml") mocker.patch( "swh.deposit.cli.client.tempfile.TemporaryDirectory", return_value=contextlib.nullcontext(str(tmp_path)), ) with open(metadata_path, "a"): pass # creates the file runner = CliRunner() # Test missing author result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], ], ) assert result.exit_code == 1, result.output assert result.output == "" assert len(caplog.record_tuples) == 1 (_logger, level, message) = caplog.record_tuples[0] assert level == logging.ERROR assert " --author " in message # Clear mocking state caplog.clear() mock_client.reset_mock() # Test missing name result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--author", "Jane Doe", ], ) assert result.exit_code == 1, result.output assert result.output == "" assert len(caplog.record_tuples) == 1 (_logger, level, message) = caplog.record_tuples[0] assert level == logging.ERROR assert " --name " in message # Clear mocking state caplog.clear() mock_client.reset_mock() # Test both --metadata and --author result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--metadata", metadata_path, "--author", "Jane Doe", ], ) assert result.exit_code == 1, result.output assert result.output == "" assert len(caplog.record_tuples) == 1 (_logger, level, message) = caplog.record_tuples[0] assert level == logging.ERROR assert re.search("--metadata.*is incompatible with", message) # Clear mocking state caplog.clear() mock_client.reset_mock() def test_single_deposit_slug_generation( sample_archive, mocker, caplog, tmp_path, client_mock ): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa slug = "my-slug" collection = "my-collection" metadata_path = os.path.join(tmp_path, "metadata.xml") mocker.patch( "swh.deposit.cli.client.tempfile.TemporaryDirectory", return_value=contextlib.nullcontext(str(tmp_path)), ) runner = CliRunner() result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--slug", slug, "--collection", collection, "--author", "Jane Doe", ], ) assert result.exit_code == 0, result.output assert result.output == "" assert caplog.record_tuples == [ ("swh.deposit.cli.client", logging.INFO, '{"foo": "bar"}'), ] client_mock.deposit_create.assert_called_once_with( archive=sample_archive["path"], collection=collection, in_progress=False, metadata=metadata_path, slug=slug, ) with open(metadata_path) as fd: assert ( fd.read() == """\ \ttest-project \tmy-slug \t \t\tJane Doe \t """ ) def test_multisteps_deposit( sample_archive, atom_dataset, mocker, caplog, datadir, client_mock, slug ): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit """ # noqa slug = generate_slug() mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#create-an-incomplete-deposit client_mock.deposit_create.return_value = '{"deposit_id": "42"}' runner = CliRunner() result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--partial", ], ) assert result.exit_code == 0, result.output assert result.output == "" assert caplog.record_tuples == [ ("swh.deposit.cli.client", logging.INFO, '{"deposit_id": "42"}'), ] client_mock.deposit_create.assert_called_once_with( archive=sample_archive["path"], collection="softcol", in_progress=True, metadata=None, slug=slug, ) # Clear mocking state caplog.clear() client_mock.reset_mock() # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml") result = runner.invoke( cli, [ "upload", "--url", "mock://deposit.swh/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, ], ) assert result.exit_code == 0, result.output assert result.output == "" assert caplog.record_tuples == [ ("swh.deposit.cli.client", logging.INFO, '{"deposit_id": "42"}'), ] client_mock.deposit_create.assert_called_once_with( archive=None, collection="softcol", in_progress=False, metadata=metadata_path, slug=slug, ) # Clear mocking state caplog.clear() client_mock.reset_mock()