diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py index f631e165..8cc0f609 100644 --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,356 +1,381 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging import sys import tempfile import uuid import click import xmltodict from swh.deposit.client import PublicApiDepositClient from swh.deposit.cli import deposit logger = logging.getLogger(__name__) class InputError(ValueError): """Input script error """ pass def generate_slug(): """Generate a slug (sample purposes). """ return str(uuid.uuid4()) def _url(url): """Force the /1 api version at the end of the url (avoiding confusing issues without it). Args: url (str): api url used by cli users Returns: Top level api url to actually request """ if not url.endswith('/1'): url = '%s/1' % url return url def generate_metadata_file(name, external_id, authors, temp_dir): """Generate a temporary metadata file with the minimum required metadata This generates a xml file in a temporary location and returns the path to that file. This is up to the client of that function to clean up the temporary file. Args: name (str): Software's name external_id (str): External identifier (slug) or generated one authors (List[str]): List of author names Returns: Filepath to the metadata generated file """ path = os.path.join(temp_dir, 'metadata.xml') # generate a metadata file with the minimum required metadata codemetadata = { 'entry': { '@xmlns': "http://www.w3.org/2005/Atom", '@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", 'codemeta:name': name, 'codemeta:identifier': external_id, 'codemeta:author': [{ 'codemeta:name': author_name } for author_name in authors], }, } logging.debug('Temporary file: %s', path) logging.debug('Metadata dict to generate as xml: %s', codemetadata) s = xmltodict.unparse(codemetadata, pretty=True) logging.debug('Metadata dict as xml generated: %s', s) with open(path, 'w') as fp: fp.write(s) return path def _client(url, username, password): """Instantiate a client to access the deposit api server Args: url (str): Deposit api server username (str): User password (str): User's password """ client = PublicApiDepositClient({ 'url': url, 'auth': { 'username': username, 'password': password }, }) return client def _collection(client): """Retrieve the client's collection """ # retrieve user's collection sd_content = client.service_document() if 'error' in sd_content: raise InputError('Service document retrieval: %s' % ( sd_content['error'], )) collection = sd_content[ 'service']['workspace']['collection']['sword:name'] return collection def client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, name, authors, temp_dir): """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) requires: - an existing software archive - an existing metadata file or author(s) and name provided in params - A binary deposit (create/update) requires an existing software archive - A metadata deposit (create/update) requires an existing metadata file or author(s) and name provided in params - A deposit update requires a deposit_id This will not prevent all failure cases though. The remaining errors are already dealt with by the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if not slug: # generate one as this is mandatory slug = generate_slug() - if not metadata and name and authors: - metadata = generate_metadata_file(name, slug, authors, temp_dir) + if not metadata: + if name and authors: + metadata = generate_metadata_file(name, slug, authors, temp_dir) + elif not archive_deposit and not partial and not deposit_id: + # If we meet all the following conditions: + # * there is not an archive-only deposit + # * it is not part of a multipart deposit (either create/update + # or finish) + # * it misses either name or authors + raise InputError( + "Either a metadata file (--metadata) or both --author and " + "--name must be provided, unless this is an archive-only " + "deposit.") + elif name or authors: + # If we are generating metadata, then all mandatory metadata + # must be present + raise InputError( + "Either a metadata file (--metadata) or both --author and " + "--name must be provided.") + else: + # TODO: this is a multipart deposit, we might want to check that + # metadata are deposited at some point + pass + elif name or authors: + raise InputError( + "Using a metadata file (--metadata) is incompatible with " + "--author and --name, which are used to generate one.") if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit must be provided for metadata " "deposit (either a filepath or --name and --author)") if not archive and not metadata and partial: raise InputError( 'Please provide an actionable command. See --help for more ' 'information') if replace and not deposit_id: raise InputError( 'To update an existing deposit, you must provide its id') client = _client(url, username, password) if not collection: collection = _collection(client) return { 'archive': archive, 'username': username, 'password': password, 'metadata': metadata, 'collection': collection, 'slug': slug, 'in_progress': partial, 'client': client, 'url': url, 'deposit_id': deposit_id, 'replace': replace, } def _subdict(d, keys): 'return a dict from d with only given keys' return {k: v for k, v in d.items() if k in keys} def deposit_create(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Create deposit') client = config['client'] keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress') return client.deposit_create( **_subdict(config, keys)) def deposit_update(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Update deposit') client = config['client'] keys = ('collection', 'deposit_id', 'archive', 'metadata', 'slug', 'in_progress', 'replace') return client.deposit_update( **_subdict(config, keys)) @deposit.command() @click.option('--username', required=True, help="(Mandatory) User's name") @click.option('--password', required=True, help="(Mandatory) User's associated password") @click.option('--archive', type=click.Path(exists=True), help='(Optional) Software archive to deposit') @click.option('--metadata', type=click.Path(exists=True), help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa @click.option('--archive-deposit/--no-archive-deposit', default=False, help='(Optional) Software archive only deposit') @click.option('--metadata-deposit/--no-metadata-deposit', default=False, help='(Optional) Metadata only deposit') @click.option('--collection', help="(Optional) User's collection. If not provided, this will be fetched.") # noqa @click.option('--slug', help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa @click.option('--partial/--no-partial', default=False, help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa @click.option('--deposit-id', default=None, help='(Optional) Update an existing partial deposit with its identifier') # noqa @click.option('--replace/--no-replace', default=False, help='(Optional) Update by replacing existing metadata to a deposit') # noqa @click.option('--url', default='https://deposit.softwareheritage.org', help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa @click.option('--verbose/--no-verbose', default=False, help='Verbose mode') @click.option('--name', help='Software name') @click.option('--author', multiple=True, help='Software author(s), this can be repeated as many times' ' as there are authors') @click.pass_context def upload(ctx, username, password, archive=None, metadata=None, archive_deposit=False, metadata_deposit=False, collection=None, slug=None, partial=False, deposit_id=None, replace=False, url='https://deposit.softwareheritage.org', verbose=False, name=None, author=None): """Software Heritage Public Deposit Client Create/Update deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ url = _url(url) config = {} with tempfile.TemporaryDirectory() as temp_dir: try: logger.debug('Parsing cli options') config = client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, name, author, temp_dir) except InputError as e: logger.error('Problem during parsing options: %s', e) sys.exit(1) if verbose: logger.info("Parsed configuration: %s" % ( config, )) deposit_id = config['deposit_id'] if deposit_id: r = deposit_update(config, logger) else: r = deposit_create(config, logger) logger.info(r) @deposit.command() @click.option('--url', default='https://deposit.softwareheritage.org', help="(Optional) Deposit server api endpoint. By default, " "https://deposit.softwareheritage.org/1") @click.option('--username', required=True, help="(Mandatory) User's name") @click.option('--password', required=True, help="(Mandatory) User's associated password") @click.option('--deposit-id', default=None, required=True, help="Deposit identifier.") @click.pass_context def status(ctx, url, username, password, deposit_id): """Deposit's status """ url = _url(url) logger.debug('Status deposit') try: client = _client(url, username, password) collection = _collection(client) except InputError as e: logger.error('Problem during parsing options: %s', e) sys.exit(1) r = client.deposit_status( collection=collection, deposit_id=deposit_id) logger.info(r) diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py index c265e11c..656ebf69 100644 --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -1,236 +1,323 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import logging import os +import re from unittest.mock import MagicMock from click.testing import CliRunner import pytest from swh.deposit.client import PublicApiDepositClient from swh.deposit.cli.client import ( generate_slug, _url, _client, _collection, InputError) from swh.deposit.cli import deposit as cli from ..conftest import TEST_USER EXAMPLE_SERVICE_DOCUMENT = { 'service': { 'workspace': { 'collection': { 'sword:name': 'softcol', } } } } @pytest.fixture def slug(): return generate_slug() @pytest.fixture def client_mock(mocker, slug): mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) mock_client = MagicMock() mocker.patch( 'swh.deposit.cli.client._client', return_value=mock_client) mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT mock_client.deposit_create.return_value = '{"foo": "bar"}' return mock_client def test_url(): assert _url('http://deposit') == 'http://deposit/1' assert _url('https://other/1') == 'https://other/1' def test_client(): client = _client('http://deposit', 'user', 'pass') assert isinstance(client, PublicApiDepositClient) def test_collection_error(): mock_client = MagicMock() mock_client.service_document.return_value = { 'error': 'something went wrong' } with pytest.raises(InputError) as e: _collection(mock_client) assert 'Service document retrieval: something went wrong' == str(e.value) def test_collection_ok(): mock_client = MagicMock() mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT collection_name = _collection(mock_client) assert collection_name == 'softcol' def test_single_minimal_deposit( sample_archive, mocker, caplog, client_mock, slug, tmp_path): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa metadata_path = os.path.join(tmp_path, 'metadata.xml') mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', return_value=contextlib.nullcontext(str(tmp_path))) runner = CliRunner() result = runner.invoke(cli, [ 'upload', '--url', 'mock://deposit.swh/1', '--username', TEST_USER['username'], '--password', TEST_USER['password'], '--name', 'test-project', '--archive', sample_archive['path'], '--author', 'Jane Doe', ]) assert result.exit_code == 0, result.output assert result.output == '' assert caplog.record_tuples == [ ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'), ] client_mock.deposit_create.assert_called_once_with( archive=sample_archive['path'], collection='softcol', in_progress=False, metadata=metadata_path, slug=slug) with open(metadata_path) as fd: assert fd.read() == f'''\ \ttest-project \t{slug} \t \t\tJane Doe \t ''' +def test_metadata_validation(sample_archive, mocker, caplog, tmp_path): + """ from: + https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit + """ # noqa + slug = generate_slug() + mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + mock_client = MagicMock() + mocker.patch( + 'swh.deposit.cli.client._client', + return_value=mock_client) + mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT + mock_client.deposit_create.return_value = '{"foo": "bar"}' + + metadata_path = os.path.join(tmp_path, 'metadata.xml') + mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', + return_value=contextlib.nullcontext(str(tmp_path))) + with open(metadata_path, 'a'): + pass # creates the file + + runner = CliRunner() + + # Test missing author + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--name', 'test-project', + '--archive', sample_archive['path'], + ]) + + assert result.exit_code == 1, result.output + assert result.output == '' + assert len(caplog.record_tuples) == 1 + (_logger, level, message) = caplog.record_tuples[0] + assert level == logging.ERROR + assert ' --author ' in message + + # Clear mocking state + caplog.clear() + mock_client.reset_mock() + + # Test missing name + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--archive', sample_archive['path'], + '--author', 'Jane Doe', + ]) + + assert result.exit_code == 1, result.output + assert result.output == '' + assert len(caplog.record_tuples) == 1 + (_logger, level, message) = caplog.record_tuples[0] + assert level == logging.ERROR + assert ' --name ' in message + + # Clear mocking state + caplog.clear() + mock_client.reset_mock() + + # Test both --metadata and --author + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--archive', sample_archive['path'], + '--metadata', metadata_path, + '--author', 'Jane Doe', + ]) + + assert result.exit_code == 1, result.output + assert result.output == '' + assert len(caplog.record_tuples) == 1 + (_logger, level, message) = caplog.record_tuples[0] + assert level == logging.ERROR + assert re.search('--metadata.*is incompatible with', message) + + # Clear mocking state + caplog.clear() + mock_client.reset_mock() + + def test_single_deposit_slug_generation( sample_archive, mocker, caplog, tmp_path, client_mock): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa slug = 'my-slug' collection = 'my-collection' metadata_path = os.path.join(tmp_path, 'metadata.xml') mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', return_value=contextlib.nullcontext(str(tmp_path))) runner = CliRunner() result = runner.invoke(cli, [ 'upload', '--url', 'mock://deposit.swh/1', '--username', TEST_USER['username'], '--password', TEST_USER['password'], '--name', 'test-project', '--archive', sample_archive['path'], '--slug', slug, '--collection', collection, '--author', 'Jane Doe', ]) assert result.exit_code == 0, result.output assert result.output == '' assert caplog.record_tuples == [ ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'), ] client_mock.deposit_create.assert_called_once_with( archive=sample_archive['path'], collection=collection, in_progress=False, metadata=metadata_path, slug=slug) with open(metadata_path) as fd: assert fd.read() == '''\ \ttest-project \tmy-slug \t \t\tJane Doe \t ''' def test_multisteps_deposit( sample_archive, atom_dataset, mocker, caplog, datadir, client_mock, slug): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit """ # noqa slug = generate_slug() mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#create-an-incomplete-deposit client_mock.deposit_create.return_value = '{"deposit_id": "42"}' runner = CliRunner() result = runner.invoke(cli, [ 'upload', '--url', 'mock://deposit.swh/1', '--username', TEST_USER['username'], '--password', TEST_USER['password'], '--archive', sample_archive['path'], '--partial', ]) assert result.exit_code == 0, result.output assert result.output == '' assert caplog.record_tuples == [ ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'), ] client_mock.deposit_create.assert_called_once_with( archive=sample_archive['path'], collection='softcol', in_progress=True, metadata=None, slug=slug) # Clear mocking state caplog.clear() client_mock.reset_mock() # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit metadata_path = os.path.join( datadir, 'atom', 'entry-data-deposit-binary.xml') result = runner.invoke(cli, [ 'upload', '--url', 'mock://deposit.swh/1', '--username', TEST_USER['username'], '--password', TEST_USER['password'], '--metadata', metadata_path, ]) assert result.exit_code == 0, result.output assert result.output == '' assert caplog.record_tuples == [ ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'), ] client_mock.deposit_create.assert_called_once_with( archive=None, collection='softcol', in_progress=False, metadata=metadata_path, slug=slug) # Clear mocking state caplog.clear() client_mock.reset_mock()