diff --git a/swh/deposit/injection/client.py b/swh/deposit/injection/client.py index 1669e20a..73b0fe91 100644 --- a/swh/deposit/injection/client.py +++ b/swh/deposit/injection/client.py @@ -1,127 +1,126 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -"""Module in charge of defining an swh-deposit client +"""Module in charge of defining a swh-deposit client """ import requests from swh.core.config import SWHConfig class DepositClient(SWHConfig): """Deposit client to: - read archive - read metadata - update deposit's status """ CONFIG_BASE_FILENAME = 'deposit/client' DEFAULT_CONFIG = {} - def __init__(self, config=None): + def __init__(self, config=None, _client=requests): if config is not None: self.config = config else: super().__init__() - # self._client = _client + self._client = _client - # if 'user' in self.config and 'password' in self.config: - # self.auth = (self.config['user'], self.config['pass']) - # else: - # self.auth = None - - _methods = {'get': requests.get, 'put': requests.put} - - _supported_methods = set(_methods.keys()) + if 'username' in self.config and 'password' in self.config: + self.auth = (self.config['username'], self.config['password']) + else: + self.auth = None def do(self, method, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in self._methods' keys Returns: The request's execution """ - if method not in self._supported_methods: - raise ValueError('Development error, only methods %s supported' % ( - self._supported_methods)) + if hasattr(self._client, method): + method_fn = getattr(self._client, method) + else: + raise ValueError('Development error, unsupported method %s' % ( + method)) + + if self.auth: + kwargs['auth'] = self.auth - # if self.auth: - # kwargs['auth'] = self.auth - return self._methods[method](*args, **kwargs) + return method_fn(*args, **kwargs) def archive_get(self, archive_update_url, archive_path, log=None): """Retrieve the archive from the deposit to a local directory. Args: archive_update_url (str): The full deposit archive(s)'s raw content to retrieve locally archive_path (str): the local archive's path where to store the raw content Returns: The archive path to the local archive to load. Or None if any problem arose. """ - r = requests.get(archive_update_url, stream=True) + r = self.do('get', archive_update_url, stream=True) if r.ok: with open(archive_path, 'wb') as f: for chunk in r.iter_content(): f.write(chunk) return archive_path msg = 'Problem when retrieving deposit archive at %s' % ( archive_update_url, ) if log: log.error(msg) raise ValueError(msg) def metadata_get(self, metadata_url, log=None): """Retrieve the metadata information on a given deposit. Args: metadata_url (str): The full deposit metadata url to retrieve locally Returns: The dictionary of metadata for that deposit or None if any problem arose. """ - r = requests.get(metadata_url) + r = self.do('get', metadata_url) if r.ok: return r.json() msg = 'Problem when retrieving metadata at %s' % metadata_url if log: log.error(msg) raise ValueError(msg) def status_update(self, update_status_url, status, revision_id=None): """Update the deposit's status. Args: update_status_url (str): the full deposit's archive status (str): The status to update the deposit with revision_id (str/None): the revision's identifier to update to """ payload = {'status': status} if revision_id: payload['revision_id'] = revision_id - requests.put(update_status_url, json=payload) + self.do('put', update_status_url, json=payload) diff --git a/swh/deposit/tests/injection/test_client.py b/swh/deposit/tests/injection/test_client.py index 6393c45f..c3290e8d 100644 --- a/swh/deposit/tests/injection/test_client.py +++ b/swh/deposit/tests/injection/test_client.py @@ -1,176 +1,226 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import tempfile import unittest from nose.plugins.attrib import attr from nose.tools import istest -from unittest.mock import patch from swh.deposit.injection.client import DepositClient class StreamedResponse: """Streamed response facsimile """ def __init__(self, ok, stream): self.ok = ok self.stream = stream def iter_content(self): yield from self.stream +class FakeRequestClientGet: + """Fake request client dedicated to get method calls. + + """ + def __init__(self, response): + self.response = response + + def get(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + return self.response + + @attr('fs') class DepositClientReadArchiveTest(unittest.TestCase): def setUp(self): super().setUp() - self.client = DepositClient(config={}) self.temporary_directory = tempfile.mkdtemp(dir='/tmp') def tearDown(self): super().setUp() shutil.rmtree(self.temporary_directory) - @patch('swh.deposit.injection.client.requests') @istest - def archive_get(self, mock_requests): + def archive_get(self): + """Reading archive should write data in temporary directory + + """ + stream_content = [b"some", b"streamed", b"response"] + response = StreamedResponse( + ok=True, + stream=(s for s in stream_content)) + _client = FakeRequestClientGet(response) + + deposit_client = DepositClient(config={}, _client=_client) + + archive_path = os.path.join(self.temporary_directory, 'test.archive') + archive_path = deposit_client.archive_get( + 'http://nowhere:9000/some/url', archive_path) + + self.assertTrue(os.path.exists(archive_path)) + + with open(archive_path, 'rb') as f: + actual_content = f.read() + + self.assertEquals(actual_content, b''.join(stream_content)) + self.assertEquals(_client.args, + ('http://nowhere:9000/some/url', )) + self.assertEquals(_client.kwargs, { + 'stream': True + }) + + @istest + def archive_get_with_authentication(self): """Reading archive should write data in temporary directory """ stream_content = [b"some", b"streamed", b"response"] - mock_requests.get.return_value = StreamedResponse( + response = StreamedResponse( ok=True, stream=(s for s in stream_content)) + _client = FakeRequestClientGet(response) + + deposit_client = DepositClient(config={ + 'username': 'user', + 'password': 'pass' + }, + _client=_client) archive_path = os.path.join(self.temporary_directory, 'test.archive') - archive_path = self.client.archive_get( + archive_path = deposit_client.archive_get( 'http://nowhere:9000/some/url', archive_path) self.assertTrue(os.path.exists(archive_path)) with open(archive_path, 'rb') as f: actual_content = f.read() self.assertEquals(actual_content, b''.join(stream_content)) + self.assertEquals(_client.args, + ('http://nowhere:9000/some/url', )) + self.assertEquals(_client.kwargs, { + 'stream': True, + 'auth': ('user', 'pass') + }) - @patch('swh.deposit.injection.client.requests') @istest - def archive_get_can_fail(self, mock_requests): + def archive_get_can_fail(self): """Reading archive can fail for some reasons """ - mock_requests.get.return_value = StreamedResponse(ok=False, - stream=None) + response = StreamedResponse(ok=False, stream=None) + _client = FakeRequestClientGet(response) + deposit_client = DepositClient(config={}, _client=_client) url = 'http://nowhere:9001/some/url' with self.assertRaisesRegex( ValueError, 'Problem when retrieving deposit archive at %s' % url): - self.client.archive_get(url, 'some/path') + deposit_client.archive_get(url, 'some/path') class JsonResponse: """Json response facsimile """ def __init__(self, ok, response): self.ok = ok self.response = response def json(self): return self.response class DepositClientReadMetadataTest(unittest.TestCase): - def setUp(self): - super().setUp() - self.client = DepositClient(config={}) - - @patch('swh.deposit.injection.client.requests') @istest - def metadata_get(self, mock_requests): + def metadata_get(self): """Reading archive should write data in temporary directory """ expected_response = {"some": "dict"} - mock_requests.get.return_value = JsonResponse( + + response = JsonResponse( ok=True, response=expected_response) + _client = FakeRequestClientGet(response) + deposit_client = DepositClient(config={}, _client=_client) - actual_metadata = self.client.metadata_get( + actual_metadata = deposit_client.metadata_get( 'http://nowhere:9000/metadata') self.assertEquals(actual_metadata, expected_response) - @patch('swh.deposit.injection.client.requests') @istest - def metadata_get_can_fail(self, mock_requests): + def metadata_get_can_fail(self): """Reading metadata can fail for some reasons """ - mock_requests.get.return_value = StreamedResponse(ok=False, - stream=None) - + _client = FakeRequestClientGet(JsonResponse(ok=False, response=None)) + deposit_client = DepositClient(config={}, _client=_client) url = 'http://nowhere:9001/some/metadata' with self.assertRaisesRegex( ValueError, 'Problem when retrieving metadata at %s' % url): - self.client.metadata_get(url) + deposit_client.metadata_get(url) -class DepositClientStatusUpdateTest(unittest.TestCase): - def setUp(self): - super().setUp() - self.client = DepositClient(config={}) +class FakeRequestClientPut: + """Fake Request client dedicated to put request method calls. - @patch('swh.deposit.injection.client.requests') - @istest - def status_update(self, mock_requests): - """Update status + """ + args = None + kwargs = None - """ - def side_effect(status_url, json): - global actual_status_url, actual_json - actual_status_url = status_url - actual_json = json + def put(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs - mock_requests.put.side_effect = side_effect - self.client.status_update('http://nowhere:9000/update/status', - 'success', revision_id='some-revision-id') +class DepositClientStatusUpdateTest(unittest.TestCase): + @istest + def status_update(self): + """Update status - self.assertEquals(actual_status_url, - 'http://nowhere:9000/update/status') - self.assertEquals(actual_json, { - 'status': 'success', - 'revision_id': 'some-revision-id', + """ + _client = FakeRequestClientPut() + deposit_client = DepositClient(config={}, _client=_client) + + deposit_client.status_update('http://nowhere:9000/update/status', + 'success', revision_id='some-revision-id') + + self.assertEquals(_client.args, + ('http://nowhere:9000/update/status', )) + self.assertEquals(_client.kwargs, { + 'json': { + 'status': 'success', + 'revision_id': 'some-revision-id', + } }) - @patch('swh.deposit.injection.client.requests') @istest - def status_update_with_no_revision_id(self, mock_requests): + def status_update_with_no_revision_id(self): """Reading metadata can fail for some reasons """ - def side_effect(status_url, json): - global actual_status_url, actual_json - actual_status_url = status_url - actual_json = json - - mock_requests.put.side_effect = side_effect - - self.client.status_update('http://nowhere:9000/update/status', - 'failure') - - self.assertEquals(actual_status_url, - 'http://nowhere:9000/update/status') - self.assertEquals(actual_json, { - 'status': 'failure', + _client = FakeRequestClientPut() + deposit_client = DepositClient(config={}, _client=_client) + + deposit_client.status_update('http://nowhere:9001/update/status', + 'failure') + + self.assertEquals(_client.args, + ('http://nowhere:9001/update/status', )) + self.assertEquals(_client.kwargs, { + 'json': { + 'status': 'failure', + } }) diff --git a/swh/deposit/tests/injection/test_loader.py b/swh/deposit/tests/injection/test_loader.py index 544721e4..3c566357 100644 --- a/swh/deposit/tests/injection/test_loader.py +++ b/swh/deposit/tests/injection/test_loader.py @@ -1,320 +1,320 @@ # Copyright (C) 2016-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import unittest import shutil from nose.tools import istest from nose.plugins.attrib import attr from rest_framework.test import APITestCase from swh.model import hashutil from swh.deposit.injection.loader import DepositLoader from swh.deposit.injection.client import DepositClient from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import PRIVATE_PUT_DEPOSIT from django.core.urlresolvers import reverse from .. import TEST_LOADER_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine TOOL_ID = 99 PROVIDER_ID = 12 class DepositLoaderInhibitsStorage: """Mixin class to inhibit the persistence and keep in memory the data sent for storage. cf. SWHDepositLoaderNoStorage """ - def __init__(self): - super().__init__() + def __init__(self, client=None): + # client is not used here, transit it nonetheless to other mixins + super().__init__(client=client) # typed data self.state = { 'origin': [], 'origin_visit': [], 'origin_metadata': [], 'content': [], 'directory': [], 'revision': [], 'release': [], 'occurrence': [], 'tool': [], 'provider': [] } def _add(self, type, l): """Add without duplicates and keeping the insertion order. Args: type (str): Type of objects concerned by the action l ([object]): List of 'type' object """ col = self.state[type] for o in l: if o in col: continue col.extend([o]) def send_origin(self, origin): origin.update({'id': 1}) self._add('origin', [origin]) return origin['id'] def send_origin_visit(self, origin_id, visit_date): origin_visit = { 'origin': origin_id, 'visit_date': visit_date, 'visit': 1, } self._add('origin_visit', [origin_visit]) return origin_visit def send_origin_metadata(self, origin_id, visit_date, provider_id, tool_id, metadata): origin_metadata = { 'origin_id': origin_id, 'visit_date': visit_date, 'provider_id': provider_id, 'tool_id': tool_id, 'metadata': metadata } self._add('origin_metadata', [origin_metadata]) return origin_metadata def send_tool(self, tool): tool = { 'tool_name': tool['tool_name'], 'tool_version': tool['tool_version'], 'tool_configuration': tool['tool_configuration'] } self._add('tool', [tool]) tool_id = TOOL_ID return tool_id def send_provider(self, provider): provider = { 'provider_name': provider['provider_name'], 'provider_type': provider['provider_type'], 'provider_url': provider['provider_url'], 'metadata': provider['metadata'] } self._add('provider', [provider]) provider_id = PROVIDER_ID return provider_id def maybe_load_contents(self, contents): self._add('content', contents) def maybe_load_directories(self, directories): self._add('directory', directories) def maybe_load_revisions(self, revisions): self._add('revision', revisions) def maybe_load_releases(self, releases): self._add('release', releases) def maybe_load_occurrences(self, occurrences): self._add('occurrence', occurrences) def open_fetch_history(self): pass def close_fetch_history_failure(self, fetch_history_id): pass def close_fetch_history_success(self, fetch_history_id): pass def update_origin_visit(self, origin_id, visit, status): self.status = status # Override to do nothing at the end def close_failure(self): pass def close_success(self): pass class TestLoaderUtils(unittest.TestCase): def assertRevisionsOk(self, expected_revisions): """Check the loader's revisions match the expected revisions. Expects self.loader to be instantiated and ready to be inspected (meaning the loading took place). Args: expected_revisions (dict): Dict with key revision id, value the targeted directory id. """ # The last revision being the one used later to start back from for rev in self.loader.state['revision']: rev_id = hashutil.hash_to_hex(rev['id']) directory_id = hashutil.hash_to_hex(rev['directory']) self.assertEquals(expected_revisions[rev_id], directory_id) class SWHDepositLoaderNoStorage(DepositLoaderInhibitsStorage, DepositLoader): """Loader to test. It inherits from the actual deposit loader to actually test its correct behavior. It also inherits from DepositLoaderInhibitsStorageLoader so that no persistence takes place. """ pass class SWHDepositTestClient(DepositClient): def __init__(self, client, config): super().__init__(config=config) self.client = client def archive_get(self, archive_update_url, archive_path, log=None): r = self.client.get(archive_update_url) # import os # os.makedirs(os.path.dirname(archive_path), exist_ok=True) with open(archive_path, 'wb') as f: for chunk in r.streaming_content: f.write(chunk) return archive_path def metadata_get(self, metadata_url, log=None): r = self.client.get(metadata_url) return json.loads(r.content.decode('utf-8')) def status_update(self, update_status_url, status, revision_id=None): payload = {'status': status} if revision_id: payload['revision_id'] = revision_id self.client.put(update_status_url, content_type='application/json', data=json.dumps(payload)) @attr('fs') class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine, TestLoaderUtils): def setUp(self): super().setUp() # create the extraction dir used by the loader os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) self.server = 'http://localhost/' # 1. create a deposit with archive and metadata self.deposit_id = self.create_simple_binary_deposit() - - # 2. setup loader with no persistence and that client - self.loader = SWHDepositLoaderNoStorage() - # 3. Sets a basic client which accesses the test data - self.loader.client = SWHDepositTestClient(self.client, config={}) + # 2. Sets a basic client which accesses the test data + loader_client = SWHDepositTestClient(self.client, config={}) + # 3. setup loader with no persistence and that client + self.loader = SWHDepositLoaderNoStorage(client=loader_client) def tearDown(self): super().tearDown() shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) @istest def inject_deposit_ready(self): """Load a deposit which is ready """ args = [self.collection.name, self.deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then self.assertEquals(len(self.loader.state['content']), 1) self.assertEquals(len(self.loader.state['directory']), 1) self.assertEquals(len(self.loader.state['revision']), 1) self.assertEquals(len(self.loader.state['release']), 0) self.assertEquals(len(self.loader.state['occurrence']), 1) # FIXME enrich state introspection # expected_revisions = {} # self.assertRevisionsOk(expected_revisions) @istest def inject_deposit_verify_metadata(self): """Load a deposit with metadata, test metadata integrity """ self.deposit_metadata_id = self.add_metadata_to_deposit( self.deposit_id) args = [self.collection.name, self.deposit_metadata_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then self.assertEquals(len(self.loader.state['content']), 1) self.assertEquals(len(self.loader.state['directory']), 1) self.assertEquals(len(self.loader.state['revision']), 1) self.assertEquals(len(self.loader.state['release']), 0) self.assertEquals(len(self.loader.state['occurrence']), 1) self.assertEquals(len(self.loader.state['origin_metadata']), 1) self.assertEquals(len(self.loader.state['tool']), 1) self.assertEquals(len(self.loader.state['provider']), 1) atom = '{http://www.w3.org/2005/Atom}' codemeta = '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}' expected_origin_metadata = { atom + 'author': { atom + 'email': 'hal@ccsd.cnrs.fr', atom + 'name': 'HAL' }, codemeta + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', codemeta + 'runtimePlatform': 'phpstorm', codemeta + 'license': { codemeta + 'name': 'CeCILL Free Software License Agreement v1.1' }, codemeta + 'programmingLanguage': 'C', codemeta + 'applicationCategory': 'test', codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00', codemeta + 'version': 1, atom + 'external_identifier': 'hal-01243065', atom + 'title': 'Composing a Web of Audio Applications', codemeta + 'description': 'this is the description', atom + 'id': 'hal-01243065', atom + 'client': 'hal', codemeta + 'keywords': 'DSP programming,Web', codemeta + 'developmentStatus': 'stable' } result = self.loader.state['origin_metadata'][0] self.assertEquals(result['metadata'], expected_origin_metadata) self.assertEquals(result['tool_id'], TOOL_ID) self.assertEquals(result['provider_id'], PROVIDER_ID)