Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/deposit/injection/checker.py b/swh/deposit/injection/checker.py
index 7f867d5e..34ffd018 100644
--- a/swh/deposit/injection/checker.py
+++ b/swh/deposit/injection/checker.py
@@ -1,23 +1,20 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from .client import DepositClient
class DepositChecker():
"""Deposit checker implementation.
Trigger deposit's checks through the private api.
"""
def __init__(self, client=None):
super().__init__()
- if client:
- self.client = client
- else:
- self.client = DepositClient()
+ self.client = client if client else DepositClient()
def check(self, deposit_check_url):
- self.client.get(deposit_check_url)
+ return self.client.check(deposit_check_url)
diff --git a/swh/deposit/injection/client.py b/swh/deposit/injection/client.py
index 73b0fe91..0d1b4ce2 100644
--- a/swh/deposit/injection/client.py
+++ b/swh/deposit/injection/client.py
@@ -1,126 +1,150 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module in charge of defining a swh-deposit client
"""
import requests
from swh.core.config import SWHConfig
class DepositClient(SWHConfig):
"""Deposit client to:
- - read archive
- - read metadata
- - update deposit's status
+ - read a given deposit's archive(s)
+ - read a given deposit's metadata
+ - update a given deposit's status
"""
CONFIG_BASE_FILENAME = 'deposit/client'
- DEFAULT_CONFIG = {}
+ DEFAULT_CONFIG = {
+ 'url': ('str', 'http://localhost:8000'),
+ 'auth': ('dict', {}) # with optional 'username'/'password' keys
+ }
def __init__(self, config=None, _client=requests):
- if config is not None:
- self.config = config
+ super().__init__()
+ if config is None:
+ self.config = super().parse_config_file()
else:
- super().__init__()
+ self.config = config
self._client = _client
-
- if 'username' in self.config and 'password' in self.config:
- self.auth = (self.config['username'], self.config['password'])
- else:
+ self.base_url = self.config['url']
+ auth = self.config['auth']
+ if auth == {}:
self.auth = None
+ else:
+ self.auth = (auth['username'], auth['password'])
- def do(self, method, *args, **kwargs):
+ def do(self, method, url, *args, **kwargs):
"""Internal method to deal with requests, possibly with basic http
authentication.
Args:
method (str): supported http methods as in self._methods' keys
Returns:
The request's execution
"""
if hasattr(self._client, method):
method_fn = getattr(self._client, method)
else:
raise ValueError('Development error, unsupported method %s' % (
method))
if self.auth:
kwargs['auth'] = self.auth
- return method_fn(*args, **kwargs)
+ full_url = '%s%s' % (self.base_url.rstrip('/'), url)
+ return method_fn(full_url, *args, **kwargs)
def archive_get(self, archive_update_url, archive_path, log=None):
"""Retrieve the archive from the deposit to a local directory.
Args:
archive_update_url (str): The full deposit archive(s)'s raw content
to retrieve locally
archive_path (str): the local archive's path where to store
the raw content
Returns:
The archive path to the local archive to load.
Or None if any problem arose.
"""
r = self.do('get', archive_update_url, stream=True)
if r.ok:
with open(archive_path, 'wb') as f:
for chunk in r.iter_content():
f.write(chunk)
return archive_path
msg = 'Problem when retrieving deposit archive at %s' % (
archive_update_url, )
if log:
log.error(msg)
raise ValueError(msg)
def metadata_get(self, metadata_url, log=None):
"""Retrieve the metadata information on a given deposit.
Args:
metadata_url (str): The full deposit metadata url to retrieve
locally
Returns:
The dictionary of metadata for that deposit or None if any
problem arose.
"""
r = self.do('get', metadata_url)
if r.ok:
return r.json()
msg = 'Problem when retrieving metadata at %s' % metadata_url
if log:
log.error(msg)
raise ValueError(msg)
def status_update(self, update_status_url, status,
revision_id=None):
"""Update the deposit's status.
Args:
update_status_url (str): the full deposit's archive
status (str): The status to update the deposit with
revision_id (str/None): the revision's identifier to update to
"""
payload = {'status': status}
if revision_id:
payload['revision_id'] = revision_id
self.do('put', update_status_url, json=payload)
+
+ def check(self, check_url, log=None):
+ """Check the deposit's associated data (metadata, archive(s))
+
+ Args:
+ check_url (str): the full deposit's check url
+
+ """
+ r = self.do('get', check_url)
+ if r.ok:
+ data = r.json()
+ return data['status']
+
+ msg = 'Problem when checking deposit %s' % check_url
+ if log:
+ log.error(msg)
+
+ raise ValueError(msg)
diff --git a/swh/deposit/injection/loader.py b/swh/deposit/injection/loader.py
index a9c973cb..cedbbf5f 100644
--- a/swh/deposit/injection/loader.py
+++ b/swh/deposit/injection/loader.py
@@ -1,125 +1,122 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import os
import tempfile
from swh.model import hashutil
from swh.loader.tar import loader
from swh.loader.core.loader import SWHLoader
from .client import DepositClient
class DepositLoader(loader.TarLoader):
"""Deposit loader implementation.
This is a subclass of the :class:TarLoader as the main goal of
this class is to first retrieve the deposit's tarball contents as
one and its associated metadata. Then provide said tarball to be
loaded by the TarLoader.
This will:
- retrieves the deposit's archive locally
- provide the archive to be loaded by the tar loader
- clean up the temporary location used to retrieve the archive locally
- update the deposit's status accordingly
"""
def __init__(self, client=None):
super().__init__()
- if client:
- self.client = client
- else:
- self.client = DepositClient()
+ self.client = client if client else DepositClient()
def load(self, *, archive_url, deposit_meta_url, deposit_update_url):
SWHLoader.load(
self,
archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url):
"""Prepare the injection by first retrieving the deposit's raw archive
content.
"""
self.deposit_update_url = deposit_update_url
temporary_directory = tempfile.TemporaryDirectory()
self.temporary_directory = temporary_directory
archive_path = os.path.join(temporary_directory.name, 'archive.zip')
archive = self.client.archive_get(
archive_url, archive_path, log=self.log)
metadata = self.client.metadata_get(
deposit_meta_url, log=self.log)
origin = metadata['origin']
visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
revision = metadata['revision']
occurrence = metadata['occurrence']
self.origin_metadata = metadata['origin_metadata']
self.prepare_metadata()
self.client.status_update(deposit_update_url, 'injecting')
super().prepare(tar_path=archive,
origin=origin,
visit_date=visit_date,
revision=revision,
occurrences=[occurrence])
def store_metadata(self):
"""Storing the origin_metadata during the load processus.
Provider_id and tool_id are resolved during the prepare() method.
"""
origin_id = self.origin_id
visit_date = self.visit_date
provider_id = self.origin_metadata['provider']['provider_id']
tool_id = self.origin_metadata['tool']['tool_id']
metadata = self.origin_metadata['metadata']
try:
self.send_origin_metadata(origin_id, visit_date, provider_id,
tool_id, metadata)
except:
self.log.exception('Problem when storing origin_metadata')
raise
def post_load(self, success=True):
"""Updating the deposit's status according to its loading status.
If not successful, we update its status to failure.
Otherwise, we update its status to 'success' and pass along
its associated revision.
"""
try:
if not success:
self.client.status_update(self.deposit_update_url,
status='failure')
return
# first retrieve the new revision
[rev_id] = self.objects['revision'].keys()
if rev_id:
rev_id_hex = hashutil.hash_to_hex(rev_id)
# then update the deposit's status to success with its
# revision-id
self.client.status_update(self.deposit_update_url,
status='success',
revision_id=rev_id_hex)
except:
self.log.exception(
'Problem when trying to update the deposit\'s status')
def cleanup(self):
"""Clean up temporary directory where we retrieved the tarball.
"""
super().cleanup()
self.temporary_directory.cleanup()
diff --git a/swh/deposit/injection/scheduler.py b/swh/deposit/injection/scheduler.py
index c7479f3a..c6de590a 100644
--- a/swh/deposit/injection/scheduler.py
+++ b/swh/deposit/injection/scheduler.py
@@ -1,205 +1,203 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module in charge of sending deposit injection as celery task or
scheduled one-shot tasks.
"""
import click
import logging
from abc import ABCMeta, abstractmethod
from celery import group
from swh.core import utils
from swh.core.config import SWHConfig
from swh.deposit.config import setup_django_for, DEPOSIT_STATUS_READY
class SWHScheduling(SWHConfig, metaclass=ABCMeta):
"""Base swh scheduling class to aggregate the schedule deposit
injection.
"""
CONFIG_BASE_FILENAME = 'deposit/server'
DEFAULT_CONFIG = {
'dry_run': ('bool', False),
}
def __init__(self):
super().__init__()
self.config = self.parse_config_file(
additional_configs=[self.ADDITIONAL_CONFIG])
self.log = logging.getLogger('swh.deposit.scheduling')
@abstractmethod
def schedule(self, deposits):
"""Schedule the new deposit injection.
Args:
data (dict): Deposit aggregated data
Returns:
None
"""
pass
class SWHCeleryScheduling(SWHScheduling):
"""Deposit injection as Celery task scheduling.
"""
ADDITIONAL_CONFIG = {
'task_name': (
'str', 'swh.deposit.injection.tasks.LoadDepositArchiveTsk'),
}
def __init__(self, config=None):
super().__init__()
from swh.scheduler import utils
self.task_name = self.config['task_name']
self.task = utils.get_task(self.task_name)
if config:
self.config.update(**config)
self.dry_run = self.config['dry_run']
def _convert(self, deposits):
"""Convert tuple to celery task signature.
"""
task = self.task
for archive_url, deposit_meta_url, deposit_update_url in deposits:
yield task.s(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
def schedule(self, deposits):
"""Schedule the new deposit injection directly through celery.
Args:
depositdata (dict): Deposit aggregated information.
Returns:
None
"""
if self.dry_run:
return
return group(self._convert(deposits)).delay()
class SWHScheduling(SWHScheduling):
"""Deposit injection through SWH's task scheduling interface.
"""
ADDITIONAL_CONFIG = {}
def __init__(self, config=None):
super().__init__()
from swh.scheduler.backend import SchedulerBackend
if config:
self.config.update(**config)
self.dry_run = self.config['dry_run']
self.scheduler = SchedulerBackend(**self.config)
def _convert(self, deposits):
"""Convert tuple to one-shot scheduling tasks.
"""
import datetime
for archive_url, deposit_meta_url, deposit_update_url in deposits:
yield {
'policy': 'oneshot',
'type': 'swh-deposit-archive-ingestion',
'next_run': datetime.datetime.now(tz=datetime.timezone.utc),
'arguments': {
'args': [],
'kwargs': {
'archive_url': archive_url,
'deposit_meta_url': deposit_meta_url,
'deposit_update_url': deposit_update_url,
},
}
}
def schedule(self, deposits):
"""Schedule the new deposit injection through swh.scheduler's api.
Args:
deposits (dict): Deposit aggregated information.
"""
if self.dry_run:
return
self.scheduler.create_tasks(self._convert(deposits))
def get_deposit_ready():
"""Retrieve deposit ready to be task executed.
"""
from swh.deposit.models import Deposit
yield from Deposit.objects.filter(status=DEPOSIT_STATUS_READY)
-def prepare_task_arguments(server):
+def prepare_task_arguments():
"""Convert deposit to argument for task to be executed.
"""
from swh.deposit.config import PRIVATE_GET_RAW_CONTENT
from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
from swh.deposit.config import PRIVATE_PUT_DEPOSIT
from django.core.urlresolvers import reverse
for deposit in get_deposit_ready():
args = [deposit.collection.name, deposit.id]
- archive_url = '%s%s' % (server, reverse(
- PRIVATE_GET_RAW_CONTENT, args=args))
- deposit_meta_url = '%s%s' % (server, reverse(
- PRIVATE_GET_DEPOSIT_METADATA, args=args))
- deposit_update_url = '%s%s' % (server, reverse(
- PRIVATE_PUT_DEPOSIT, args=args))
+ archive_url = reverse(
+ PRIVATE_GET_RAW_CONTENT, args=args)
+ deposit_meta_url = reverse(
+ PRIVATE_GET_DEPOSIT_METADATA, args=args)
+ deposit_update_url = reverse(
+ PRIVATE_PUT_DEPOSIT, args=args)
yield archive_url, deposit_meta_url, deposit_update_url
@click.command(
help='Schedule one-shot deposit injections')
@click.option('--platform', default='development',
help='development or production platform')
@click.option('--scheduling-method', default='celery',
help='Scheduling method')
-@click.option('--server', default='http://127.0.0.1:5006',
- help='Deposit server')
@click.option('--batch-size', default=1000, type=click.INT,
help='Task batch size')
@click.option('--dry-run/--no-dry-run', is_flag=True, default=False,
help='Dry run')
-def main(platform, scheduling_method, server, batch_size, dry_run):
+def main(platform, scheduling_method, batch_size, dry_run):
setup_django_for(platform)
override_config = {}
if dry_run:
override_config['dry_run'] = dry_run
if scheduling_method == 'celery':
scheduling = SWHCeleryScheduling(override_config)
elif scheduling_method == 'swh-scheduler':
scheduling = SWHScheduling(override_config)
else:
raise ValueError(
'Only `celery` or `swh-scheduler` values are accepted')
- for deposits in utils.grouper(prepare_task_arguments(server), batch_size):
+ for deposits in utils.grouper(prepare_task_arguments(), batch_size):
scheduling.schedule(deposits)
if __name__ == '__main__':
main()
diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py
index 1d0d0f07..5f6f7286 100644
--- a/swh/deposit/signals.py
+++ b/swh/deposit/signals.py
@@ -1,74 +1,73 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module in charge of defining some uncoupled actions on deposit.
Typically, checking that the archives deposited are ok are not
directly testing in the request/answer to avoid too long
computations.
So this is done in the deposit_on_status_ready_for_check callback.
"""
import datetime
from django.db.models.signals import post_save
from django.dispatch import receiver
from .models import DepositRequest
from .config import SWHDefaultConfig
@receiver(post_save, sender=DepositRequest)
def deposit_on_status_ready_for_check(sender, instance, created, raw, using,
update_fields, **kwargs):
"""Check the status is ready for check.
If so, try and check the associated archives.
If not, move along.
When
Triggered when a deposit is saved.
Args:
sender (DepositRequest): The model class
instance (DepositRequest): The actual instance being saved
created (bool): True if a new record was created
raw (bool): True if the model is saved exactly as presented
(i.e. when loading a fixture). One should not
query/modify other records in the database as the
database might not be in a consistent state yet
using: The database alias being used
update_fields: The set of fields to update as passed to
Model.save(), or None if update_fields wasn’t
passed to save()
"""
default_config = SWHDefaultConfig()
if not default_config.config['checks']:
return
# Schedule oneshot task for checking archives
from swh.deposit.config import PRIVATE_CHECK_DEPOSIT
from django.core.urlresolvers import reverse
- # FIXME: Generate absolute uri
args = [instance.deposit.collection.name, instance.deposit.id]
archive_check_url = reverse(
PRIVATE_CHECK_DEPOSIT, args=args)
task = {
'policy': 'oneshot',
'type': 'swh-deposit-archive-checks',
'next_run': datetime.datetime.now(tz=datetime.timezone.utc),
'arguments': {
'args': [],
'kwargs': {
'archive_check_url': archive_check_url,
},
}
}
default_config.scheduler.create_tasks([task])
diff --git a/swh/deposit/tests/injection/common.py b/swh/deposit/tests/injection/common.py
new file mode 100644
index 00000000..8d453589
--- /dev/null
+++ b/swh/deposit/tests/injection/common.py
@@ -0,0 +1,49 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+
+from swh.deposit.injection.client import DepositClient
+
+
+CLIENT_TEST_CONFIG = {
+ 'url': 'http://nowhere:9000/',
+ 'auth': {}, # no authentication in test scenario
+}
+
+
+class SWHDepositTestClient(DepositClient):
+ """Deposit test client to permit overriding the default request
+ client.
+
+ """
+ def __init__(self, client, config):
+ super().__init__(config=config)
+ self.client = client
+
+ def archive_get(self, archive_update_url, archive_path, log=None):
+ r = self.client.get(archive_update_url)
+ with open(archive_path, 'wb') as f:
+ for chunk in r.streaming_content:
+ f.write(chunk)
+
+ return archive_path
+
+ def metadata_get(self, metadata_url, log=None):
+ r = self.client.get(metadata_url)
+ return json.loads(r.content.decode('utf-8'))
+
+ def status_update(self, update_status_url, status, revision_id=None):
+ payload = {'status': status}
+ if revision_id:
+ payload['revision_id'] = revision_id
+ self.client.put(update_status_url,
+ content_type='application/json',
+ data=json.dumps(payload))
+
+ def check(self, check_url):
+ r = self.client.get(check_url)
+ data = json.loads(r.content.decode('utf-8'))
+ return data['status']
diff --git a/swh/deposit/tests/injection/test_checker.py b/swh/deposit/tests/injection/test_checker.py
new file mode 100644
index 00000000..23899e19
--- /dev/null
+++ b/swh/deposit/tests/injection/test_checker.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from nose.tools import istest
+from rest_framework.test import APITestCase
+
+from swh.deposit.models import Deposit
+from swh.deposit.config import PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_READY
+from swh.deposit.config import DEPOSIT_STATUS_REJECTED
+from swh.deposit.injection.checker import DepositChecker
+from django.core.urlresolvers import reverse
+
+
+from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG
+from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine
+from ..common import FileSystemCreationRoutine
+
+
+class DepositCheckerScenarioTest(APITestCase, WithAuthTestCase,
+ BasicTestCase, CommonCreationRoutine,
+ FileSystemCreationRoutine):
+
+ def setUp(self):
+ super().setUp()
+
+ # 2. Sets a basic client which accesses the test data
+ checker_client = SWHDepositTestClient(client=self.client,
+ config=CLIENT_TEST_CONFIG)
+ # 3. setup loader with no persistence and that client
+ self.checker = DepositChecker(client=checker_client)
+
+ @istest
+ def check_deposit_ready(self):
+ """Check a valid deposit ready-for-checks should result in ready state
+
+ """
+ # 1. create a deposit with archive and metadata
+ deposit_id = self.create_simple_binary_deposit()
+
+ args = [self.collection.name, deposit_id]
+ deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args)
+
+ # when
+ actual_status = self.checker.check(deposit_check_url=deposit_check_url)
+
+ # then
+ deposit = Deposit.objects.get(pk=deposit_id)
+ self.assertEquals(deposit.status, DEPOSIT_STATUS_READY)
+ self.assertEquals(actual_status, DEPOSIT_STATUS_READY)
+
+ @istest
+ def check_deposit_rejected(self):
+ """Check an invalid deposit ready-for-checks should result in rejected
+
+ """
+ # 1. create a deposit with archive and metadata
+ deposit_id = self.create_invalid_deposit()
+
+ args = [self.collection.name, deposit_id]
+ deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args)
+
+ # when
+ actual_status = self.checker.check(deposit_check_url=deposit_check_url)
+
+ # then
+ deposit = Deposit.objects.get(pk=deposit_id)
+ self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED)
+ self.assertEquals(actual_status, DEPOSIT_STATUS_REJECTED)
diff --git a/swh/deposit/tests/injection/test_client.py b/swh/deposit/tests/injection/test_client.py
index c3290e8d..b30366c4 100644
--- a/swh/deposit/tests/injection/test_client.py
+++ b/swh/deposit/tests/injection/test_client.py
@@ -1,226 +1,265 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import shutil
import tempfile
import unittest
from nose.plugins.attrib import attr
from nose.tools import istest
from swh.deposit.injection.client import DepositClient
+from .common import CLIENT_TEST_CONFIG
+
class StreamedResponse:
"""Streamed response facsimile
"""
def __init__(self, ok, stream):
self.ok = ok
self.stream = stream
def iter_content(self):
yield from self.stream
class FakeRequestClientGet:
"""Fake request client dedicated to get method calls.
"""
def __init__(self, response):
self.response = response
def get(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
return self.response
@attr('fs')
class DepositClientReadArchiveTest(unittest.TestCase):
def setUp(self):
super().setUp()
self.temporary_directory = tempfile.mkdtemp(dir='/tmp')
def tearDown(self):
super().setUp()
shutil.rmtree(self.temporary_directory)
@istest
def archive_get(self):
"""Reading archive should write data in temporary directory
"""
stream_content = [b"some", b"streamed", b"response"]
response = StreamedResponse(
ok=True,
stream=(s for s in stream_content))
_client = FakeRequestClientGet(response)
- deposit_client = DepositClient(config={}, _client=_client)
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
archive_path = os.path.join(self.temporary_directory, 'test.archive')
- archive_path = deposit_client.archive_get(
- 'http://nowhere:9000/some/url', archive_path)
+ archive_path = deposit_client.archive_get('/some/url', archive_path)
self.assertTrue(os.path.exists(archive_path))
with open(archive_path, 'rb') as f:
actual_content = f.read()
self.assertEquals(actual_content, b''.join(stream_content))
- self.assertEquals(_client.args,
- ('http://nowhere:9000/some/url', ))
+ self.assertEquals(_client.args, ('http://nowhere:9000/some/url', ))
self.assertEquals(_client.kwargs, {
'stream': True
})
@istest
def archive_get_with_authentication(self):
"""Reading archive should write data in temporary directory
"""
- stream_content = [b"some", b"streamed", b"response"]
+ stream_content = [b"some", b"streamed", b"response", b"for", b"auth"]
response = StreamedResponse(
ok=True,
stream=(s for s in stream_content))
_client = FakeRequestClientGet(response)
- deposit_client = DepositClient(config={
+ _config = CLIENT_TEST_CONFIG.copy()
+ _config['auth'] = { # add authentication setup
'username': 'user',
'password': 'pass'
- },
- _client=_client)
+ }
+ deposit_client = DepositClient(_config, _client=_client)
archive_path = os.path.join(self.temporary_directory, 'test.archive')
- archive_path = deposit_client.archive_get(
- 'http://nowhere:9000/some/url', archive_path)
+ archive_path = deposit_client.archive_get('/some/url', archive_path)
self.assertTrue(os.path.exists(archive_path))
with open(archive_path, 'rb') as f:
actual_content = f.read()
self.assertEquals(actual_content, b''.join(stream_content))
- self.assertEquals(_client.args,
- ('http://nowhere:9000/some/url', ))
+ self.assertEquals(_client.args, ('http://nowhere:9000/some/url', ))
self.assertEquals(_client.kwargs, {
'stream': True,
'auth': ('user', 'pass')
})
@istest
def archive_get_can_fail(self):
"""Reading archive can fail for some reasons
"""
response = StreamedResponse(ok=False, stream=None)
_client = FakeRequestClientGet(response)
- deposit_client = DepositClient(config={}, _client=_client)
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
- url = 'http://nowhere:9001/some/url'
with self.assertRaisesRegex(
ValueError,
- 'Problem when retrieving deposit archive at %s' % url):
- deposit_client.archive_get(url, 'some/path')
+ 'Problem when retrieving deposit archive'):
+ deposit_client.archive_get('/some/url', 'some/path')
class JsonResponse:
"""Json response facsimile
"""
def __init__(self, ok, response):
self.ok = ok
self.response = response
def json(self):
return self.response
class DepositClientReadMetadataTest(unittest.TestCase):
@istest
def metadata_get(self):
"""Reading archive should write data in temporary directory
"""
expected_response = {"some": "dict"}
response = JsonResponse(
ok=True,
response=expected_response)
_client = FakeRequestClientGet(response)
- deposit_client = DepositClient(config={}, _client=_client)
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
- actual_metadata = deposit_client.metadata_get(
- 'http://nowhere:9000/metadata')
+ actual_metadata = deposit_client.metadata_get('/metadata')
self.assertEquals(actual_metadata, expected_response)
@istest
def metadata_get_can_fail(self):
"""Reading metadata can fail for some reasons
"""
_client = FakeRequestClientGet(JsonResponse(ok=False, response=None))
- deposit_client = DepositClient(config={}, _client=_client)
- url = 'http://nowhere:9001/some/metadata'
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
with self.assertRaisesRegex(
ValueError,
- 'Problem when retrieving metadata at %s' % url):
- deposit_client.metadata_get(url)
+ 'Problem when retrieving metadata at'):
+ deposit_client.metadata_get('/some/metadata/url')
class FakeRequestClientPut:
"""Fake Request client dedicated to put request method calls.
"""
args = None
kwargs = None
def put(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
class DepositClientStatusUpdateTest(unittest.TestCase):
@istest
def status_update(self):
"""Update status
"""
_client = FakeRequestClientPut()
- deposit_client = DepositClient(config={}, _client=_client)
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
- deposit_client.status_update('http://nowhere:9000/update/status',
+ deposit_client.status_update('/update/status',
'success', revision_id='some-revision-id')
self.assertEquals(_client.args,
('http://nowhere:9000/update/status', ))
self.assertEquals(_client.kwargs, {
'json': {
'status': 'success',
'revision_id': 'some-revision-id',
}
})
@istest
def status_update_with_no_revision_id(self):
"""Reading metadata can fail for some reasons
"""
_client = FakeRequestClientPut()
- deposit_client = DepositClient(config={}, _client=_client)
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
- deposit_client.status_update('http://nowhere:9001/update/status',
- 'failure')
+ deposit_client.status_update('/update/status/fail', 'failure')
self.assertEquals(_client.args,
- ('http://nowhere:9001/update/status', ))
+ ('http://nowhere:9000/update/status/fail', ))
self.assertEquals(_client.kwargs, {
'json': {
'status': 'failure',
}
})
+
+
+class DepositClientCheckTest(unittest.TestCase):
+ @istest
+ def check(self):
+ """When check ok, this should return the deposit's status
+
+ """
+ _client = FakeRequestClientGet(
+ JsonResponse(ok=True, response={'status': 'something'}))
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
+
+ r = deposit_client.check('/check')
+
+ self.assertEquals(_client.args,
+ ('http://nowhere:9000/check', ))
+ self.assertEquals(_client.kwargs, {})
+ self.assertEquals(r, 'something')
+
+ @istest
+ def check_fails(self):
+ """Checking deposit can fail for some reason
+
+ """
+ _client = FakeRequestClientGet(
+ JsonResponse(ok=False, response=None))
+ deposit_client = DepositClient(config=CLIENT_TEST_CONFIG,
+ _client=_client)
+
+ with self.assertRaisesRegex(
+ ValueError,
+ 'Problem when checking deposit'):
+ deposit_client.check('/check/fails')
+
+ self.assertEquals(_client.args,
+ ('http://nowhere:9000/check/fails', ))
+ self.assertEquals(_client.kwargs, {})
diff --git a/swh/deposit/tests/injection/test_loader.py b/swh/deposit/tests/injection/test_loader.py
index cf9b198b..300e745f 100644
--- a/swh/deposit/tests/injection/test_loader.py
+++ b/swh/deposit/tests/injection/test_loader.py
@@ -1,322 +1,285 @@
-# Copyright (C) 2016-2017 The Software Heritage developers
+# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import json
import os
import unittest
import shutil
from nose.tools import istest
from nose.plugins.attrib import attr
from rest_framework.test import APITestCase
from swh.model import hashutil
from swh.deposit.injection.loader import DepositLoader
-from swh.deposit.injection.client import DepositClient
from swh.deposit.config import PRIVATE_GET_RAW_CONTENT
from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
from swh.deposit.config import PRIVATE_PUT_DEPOSIT
from django.core.urlresolvers import reverse
-
+from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG
from .. import TEST_LOADER_CONFIG
from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine
from ..common import FileSystemCreationRoutine
TOOL_ID = 99
PROVIDER_ID = 12
class DepositLoaderInhibitsStorage:
"""Mixin class to inhibit the persistence and keep in memory the data
sent for storage.
cf. SWHDepositLoaderNoStorage
"""
def __init__(self, client=None):
# client is not used here, transit it nonetheless to other mixins
super().__init__(client=client)
# typed data
self.state = {
'origin': [],
'origin_visit': [],
'origin_metadata': [],
'content': [],
'directory': [],
'revision': [],
'release': [],
'occurrence': [],
'tool': [],
'provider': []
}
def _add(self, type, l):
"""Add without duplicates and keeping the insertion order.
Args:
type (str): Type of objects concerned by the action
l ([object]): List of 'type' object
"""
col = self.state[type]
for o in l:
if o in col:
continue
col.extend([o])
def send_origin(self, origin):
origin.update({'id': 1})
self._add('origin', [origin])
return origin['id']
def send_origin_visit(self, origin_id, visit_date):
origin_visit = {
'origin': origin_id,
'visit_date': visit_date,
'visit': 1,
}
self._add('origin_visit', [origin_visit])
return origin_visit
def send_origin_metadata(self, origin_id, visit_date, provider_id, tool_id,
metadata):
origin_metadata = {
'origin_id': origin_id,
'visit_date': visit_date,
'provider_id': provider_id,
'tool_id': tool_id,
'metadata': metadata
}
self._add('origin_metadata', [origin_metadata])
return origin_metadata
def send_tool(self, tool):
tool = {
'tool_name': tool['tool_name'],
'tool_version': tool['tool_version'],
'tool_configuration': tool['tool_configuration']
}
self._add('tool', [tool])
tool_id = TOOL_ID
return tool_id
def send_provider(self, provider):
provider = {
'provider_name': provider['provider_name'],
'provider_type': provider['provider_type'],
'provider_url': provider['provider_url'],
'metadata': provider['metadata']
}
self._add('provider', [provider])
provider_id = PROVIDER_ID
return provider_id
def maybe_load_contents(self, contents):
self._add('content', contents)
def maybe_load_directories(self, directories):
self._add('directory', directories)
def maybe_load_revisions(self, revisions):
self._add('revision', revisions)
def maybe_load_releases(self, releases):
self._add('release', releases)
def maybe_load_occurrences(self, occurrences):
self._add('occurrence', occurrences)
def open_fetch_history(self):
pass
def close_fetch_history_failure(self, fetch_history_id):
pass
def close_fetch_history_success(self, fetch_history_id):
pass
def update_origin_visit(self, origin_id, visit, status):
self.status = status
# Override to do nothing at the end
def close_failure(self):
pass
def close_success(self):
pass
class TestLoaderUtils(unittest.TestCase):
def assertRevisionsOk(self, expected_revisions):
"""Check the loader's revisions match the expected revisions.
Expects self.loader to be instantiated and ready to be
inspected (meaning the loading took place).
Args:
expected_revisions (dict): Dict with key revision id,
value the targeted directory id.
"""
# The last revision being the one used later to start back from
for rev in self.loader.state['revision']:
rev_id = hashutil.hash_to_hex(rev['id'])
directory_id = hashutil.hash_to_hex(rev['directory'])
self.assertEquals(expected_revisions[rev_id], directory_id)
class SWHDepositLoaderNoStorage(DepositLoaderInhibitsStorage, DepositLoader):
"""Loader to test.
It inherits from the actual deposit loader to actually test its
correct behavior. It also inherits from
DepositLoaderInhibitsStorage so that no persistence takes place.
"""
pass
-class SWHDepositTestClient(DepositClient):
- """Deposit test client to permit overriding the default request
- client.
-
- """
- def __init__(self, client, config):
- super().__init__(config=config)
- self.client = client
-
- def archive_get(self, archive_update_url, archive_path, log=None):
- r = self.client.get(archive_update_url)
- with open(archive_path, 'wb') as f:
- for chunk in r.streaming_content:
- f.write(chunk)
-
- return archive_path
-
- def metadata_get(self, metadata_url, log=None):
- r = self.client.get(metadata_url)
- return json.loads(r.content.decode('utf-8'))
-
- def status_update(self, update_status_url, status, revision_id=None):
- payload = {'status': status}
- if revision_id:
- payload['revision_id'] = revision_id
- self.client.put(update_status_url,
- content_type='application/json',
- data=json.dumps(payload))
-
-
@attr('fs')
class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase,
BasicTestCase, CommonCreationRoutine,
FileSystemCreationRoutine, TestLoaderUtils):
def setUp(self):
super().setUp()
# create the extraction dir used by the loader
os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True)
- self.server = 'http://localhost/'
-
# 1. create a deposit with archive and metadata
self.deposit_id = self.create_simple_binary_deposit()
# 2. Sets a basic client which accesses the test data
- loader_client = SWHDepositTestClient(self.client, config={})
+ loader_client = SWHDepositTestClient(self.client,
+ config=CLIENT_TEST_CONFIG)
# 3. setup loader with no persistence and that client
self.loader = SWHDepositLoaderNoStorage(client=loader_client)
def tearDown(self):
super().tearDown()
shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir'])
@istest
def inject_deposit_ready(self):
"""Load a deposit which is ready
"""
args = [self.collection.name, self.deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
# when
self.loader.load(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
# then
self.assertEquals(len(self.loader.state['content']), 1)
self.assertEquals(len(self.loader.state['directory']), 1)
self.assertEquals(len(self.loader.state['revision']), 1)
self.assertEquals(len(self.loader.state['release']), 0)
self.assertEquals(len(self.loader.state['occurrence']), 1)
- # FIXME enrich state introspection
- # expected_revisions = {}
- # self.assertRevisionsOk(expected_revisions)
-
@istest
def inject_deposit_verify_metadata(self):
"""Load a deposit with metadata, test metadata integrity
"""
self.deposit_metadata_id = self.add_metadata_to_deposit(
self.deposit_id)
args = [self.collection.name, self.deposit_metadata_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
# when
self.loader.load(archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
# then
self.assertEquals(len(self.loader.state['content']), 1)
self.assertEquals(len(self.loader.state['directory']), 1)
self.assertEquals(len(self.loader.state['revision']), 1)
self.assertEquals(len(self.loader.state['release']), 0)
self.assertEquals(len(self.loader.state['occurrence']), 1)
self.assertEquals(len(self.loader.state['origin_metadata']), 1)
self.assertEquals(len(self.loader.state['tool']), 1)
self.assertEquals(len(self.loader.state['provider']), 1)
atom = '{http://www.w3.org/2005/Atom}'
codemeta = '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}'
expected_origin_metadata = {
atom + 'author': {
atom + 'email': 'hal@ccsd.cnrs.fr',
atom + 'name': 'HAL'
},
codemeta + 'url':
'https://hal-test.archives-ouvertes.fr/hal-01243065',
codemeta + 'runtimePlatform': 'phpstorm',
codemeta + 'license': {
codemeta + 'name':
'CeCILL Free Software License Agreement v1.1'
},
codemeta + 'programmingLanguage': 'C',
codemeta + 'applicationCategory': 'test',
codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00',
codemeta + 'version': 1,
atom + 'external_identifier': 'hal-01243065',
atom + 'title': 'Composing a Web of Audio Applications',
codemeta + 'description': 'this is the description',
atom + 'id': 'hal-01243065',
atom + 'client': 'hal',
codemeta + 'keywords': 'DSP programming,Web',
codemeta + 'developmentStatus': 'stable'
}
result = self.loader.state['origin_metadata'][0]
self.assertEquals(result['metadata'], expected_origin_metadata)
self.assertEquals(result['tool_id'], TOOL_ID)
self.assertEquals(result['provider_id'], PROVIDER_ID)

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 7:24 PM (7 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3253613

Event Timeline