Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/tests/loader/test_loader.py
Show All 11 Lines | |||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.deposit.models import Deposit | from swh.deposit.models import Deposit | ||||
from swh.deposit.loader import loader | from swh.deposit.loader import loader | ||||
from swh.deposit.config import ( | from swh.deposit.config import ( | ||||
PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT | PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT | ||||
) | ) | ||||
from django.core.urlresolvers import reverse | from django.core.urlresolvers import reverse | ||||
from swh.loader.core.tests import BaseLoaderStorageTest | |||||
from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG | from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG | ||||
from .. import TEST_LOADER_CONFIG | from .. import TEST_LOADER_CONFIG | ||||
from ..common import (BasicTestCase, WithAuthTestCase, | from ..common import (BasicTestCase, WithAuthTestCase, | ||||
CommonCreationRoutine, | CommonCreationRoutine, | ||||
FileSystemCreationRoutine) | FileSystemCreationRoutine) | ||||
TOOL_ID = 99 | |||||
PROVIDER_ID = 12 | |||||
class DepositLoaderInhibitsStorage: | |||||
"""Mixin class to inhibit the persistence and keep in memory the data | |||||
sent for storage. | |||||
cf. SWHDepositLoaderNoStorage | |||||
""" | |||||
def __init__(self, client=None): | |||||
# client is not used here, transit it nonetheless to other mixins | |||||
super().__init__(client=client) | |||||
# typed data | |||||
self.state = { | |||||
'origin': [], | |||||
'origin_visit': [], | |||||
'origin_metadata': [], | |||||
'content': [], | |||||
'directory': [], | |||||
'revision': [], | |||||
'release': [], | |||||
'snapshot': [], | |||||
'tool': [], | |||||
'provider': [] | |||||
} | |||||
def _add(self, type, l): | |||||
"""Add without duplicates and keeping the insertion order. | |||||
Args: | |||||
type (str): Type of objects concerned by the action | |||||
l ([object]): List of 'type' object | |||||
""" | |||||
col = self.state[type] | |||||
for o in l: | |||||
if o in col: | |||||
continue | |||||
col.extend([o]) | |||||
def send_origin(self, origin): | |||||
origin.update({'id': 1}) | |||||
self._add('origin', [origin]) | |||||
return origin['id'] | |||||
def send_origin_visit(self, origin_id, visit_date): | |||||
origin_visit = { | |||||
'origin': origin_id, | |||||
'visit_date': visit_date, | |||||
'visit': 1, | |||||
} | |||||
self._add('origin_visit', [origin_visit]) | |||||
return origin_visit | |||||
def send_origin_metadata(self, origin_id, visit_date, provider_id, tool_id, | |||||
metadata): | |||||
origin_metadata = { | |||||
'origin_id': origin_id, | |||||
'visit_date': visit_date, | |||||
'provider_id': provider_id, | |||||
'tool_id': tool_id, | |||||
'metadata': metadata | |||||
} | |||||
self._add('origin_metadata', [origin_metadata]) | |||||
return origin_metadata | |||||
def send_tool(self, tool): | |||||
tool = { | |||||
'tool_name': tool['tool_name'], | |||||
'tool_version': tool['tool_version'], | |||||
'tool_configuration': tool['tool_configuration'] | |||||
} | |||||
self._add('tool', [tool]) | |||||
tool_id = TOOL_ID | |||||
return tool_id | |||||
def send_provider(self, provider): | |||||
provider = { | |||||
'provider_name': provider['provider_name'], | |||||
'provider_type': provider['provider_type'], | |||||
'provider_url': provider['provider_url'], | |||||
'metadata': provider['metadata'] | |||||
} | |||||
self._add('provider', [provider]) | |||||
provider_id = PROVIDER_ID | |||||
return provider_id | |||||
def maybe_load_contents(self, contents): | |||||
self._add('content', contents) | |||||
def maybe_load_directories(self, directories): | |||||
self._add('directory', directories) | |||||
def maybe_load_revisions(self, revisions): | |||||
self._add('revision', revisions) | |||||
def maybe_load_releases(self, releases): | |||||
self._add('release', releases) | |||||
def maybe_load_snapshot(self, snapshot): | |||||
self._add('snapshot', [snapshot]) | |||||
def open_fetch_history(self): | |||||
pass | |||||
def close_fetch_history_failure(self, fetch_history_id): | |||||
pass | |||||
def close_fetch_history_success(self, fetch_history_id): | |||||
pass | |||||
def update_origin_visit(self, origin_id, visit, status): | |||||
self.status = status | |||||
# Override to do nothing at the end | |||||
def close_failure(self): | |||||
pass | |||||
def close_success(self): | |||||
pass | |||||
class TestLoaderUtils(unittest.TestCase): | class TestLoaderUtils(unittest.TestCase): | ||||
def assertRevisionsOk(self, expected_revisions): # noqa: N802 | def assertRevisionsOk(self, expected_revisions): # noqa: N802 | ||||
"""Check the loader's revisions match the expected revisions. | """Check the loader's revisions match the expected revisions. | ||||
Expects self.loader to be instantiated and ready to be | Expects self.loader to be instantiated and ready to be | ||||
inspected (meaning the loading took place). | inspected (meaning the loading took place). | ||||
Args: | Args: | ||||
expected_revisions (dict): Dict with key revision id, | expected_revisions (dict): Dict with key revision id, | ||||
value the targeted directory id. | value the targeted directory id. | ||||
""" | """ | ||||
# The last revision being the one used later to start back from | # The last revision being the one used later to start back from | ||||
for rev in self.loader.state['revision']: | for rev in self.loader.state['revision']: | ||||
rev_id = hashutil.hash_to_hex(rev['id']) | rev_id = hashutil.hash_to_hex(rev['id']) | ||||
directory_id = hashutil.hash_to_hex(rev['directory']) | directory_id = hashutil.hash_to_hex(rev['directory']) | ||||
self.assertEqual(expected_revisions[rev_id], directory_id) | self.assertEqual(expected_revisions[rev_id], directory_id) | ||||
class SWHDepositLoaderNoStorage(DepositLoaderInhibitsStorage, | |||||
loader.DepositLoader): | |||||
"""Loader to test. | |||||
It inherits from the actual deposit loader to actually test its | |||||
correct behavior. It also inherits from | |||||
DepositLoaderInhibitsStorage so that no persistence takes place. | |||||
""" | |||||
pass | |||||
@pytest.mark.fs | @pytest.mark.fs | ||||
class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, | class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, | ||||
BasicTestCase, CommonCreationRoutine, | BasicTestCase, CommonCreationRoutine, | ||||
FileSystemCreationRoutine, TestLoaderUtils): | FileSystemCreationRoutine, TestLoaderUtils, | ||||
BaseLoaderStorageTest): | |||||
def setUp(self): | def setUp(self): | ||||
super().setUp() | super().setUp() | ||||
# create the extraction dir used by the loader | # create the extraction dir used by the loader | ||||
os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) | os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) | ||||
# 1. create a deposit with archive and metadata | # 1. create a deposit with archive and metadata | ||||
self.deposit_id = self.create_simple_binary_deposit() | self.deposit_id = self.create_simple_binary_deposit() | ||||
# 2. Sets a basic client which accesses the test data | # 2. Sets a basic client which accesses the test data | ||||
loader_client = SWHDepositTestClient(self.client, | loader_client = SWHDepositTestClient(self.client, | ||||
config=CLIENT_TEST_CONFIG) | config=CLIENT_TEST_CONFIG) | ||||
# 3. setup loader with no persistence and that client | # 3. setup loader with that client | ||||
self.loader = SWHDepositLoaderNoStorage(client=loader_client) | self.loader = loader.DepositLoader(client=loader_client) | ||||
self.storage = self.loader.storage | |||||
def tearDown(self): | def tearDown(self): | ||||
super().tearDown() | super().tearDown() | ||||
shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) | shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) | ||||
def test_inject_deposit_ready(self): | def test_inject_deposit_ready(self): | ||||
"""Load a deposit which is ready | """Load a deposit which is ready | ||||
""" | """ | ||||
args = [self.collection.name, self.deposit_id] | args = [self.collection.name, self.deposit_id] | ||||
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) | archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) | ||||
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) | deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) | ||||
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) | deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) | ||||
# when | # when | ||||
self.loader.load(archive_url=archive_url, | res = self.loader.load(archive_url=archive_url, | ||||
deposit_meta_url=deposit_meta_url, | deposit_meta_url=deposit_meta_url, | ||||
deposit_update_url=deposit_update_url) | deposit_update_url=deposit_update_url) | ||||
# then | # then | ||||
self.assertEqual(len(self.loader.state['content']), 1) | self.assertEqual(res['status'], 'eventful', res) | ||||
self.assertEqual(len(self.loader.state['directory']), 1) | self.assertCountContents(1) | ||||
self.assertEqual(len(self.loader.state['revision']), 1) | self.assertCountDirectories(1) | ||||
self.assertEqual(len(self.loader.state['release']), 0) | self.assertCountRevisions(1) | ||||
self.assertEqual(len(self.loader.state['snapshot']), 1) | self.assertCountReleases(0) | ||||
self.assertCountSnapshots(1) | |||||
def test_inject_deposit_verify_metadata(self): | def test_inject_deposit_verify_metadata(self): | ||||
"""Load a deposit with metadata, test metadata integrity | """Load a deposit with metadata, test metadata integrity | ||||
""" | """ | ||||
self.deposit_metadata_id = self.add_metadata_to_deposit( | self.deposit_metadata_id = self.add_metadata_to_deposit( | ||||
self.deposit_id) | self.deposit_id) | ||||
args = [self.collection.name, self.deposit_metadata_id] | args = [self.collection.name, self.deposit_metadata_id] | ||||
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) | archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) | ||||
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) | deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) | ||||
deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) | deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) | ||||
# when | # when | ||||
self.loader.load(archive_url=archive_url, | self.loader.load(archive_url=archive_url, | ||||
deposit_meta_url=deposit_meta_url, | deposit_meta_url=deposit_meta_url, | ||||
deposit_update_url=deposit_update_url) | deposit_update_url=deposit_update_url) | ||||
# then | # then | ||||
self.assertEqual(len(self.loader.state['content']), 1) | self.assertCountContents(1) | ||||
self.assertEqual(len(self.loader.state['directory']), 1) | self.assertCountDirectories(1) | ||||
self.assertEqual(len(self.loader.state['revision']), 1) | self.assertCountRevisions(1) | ||||
self.assertEqual(len(self.loader.state['release']), 0) | self.assertCountReleases(0) | ||||
self.assertEqual(len(self.loader.state['snapshot']), 1) | self.assertCountSnapshots(1) | ||||
self.assertEqual(len(self.loader.state['origin_metadata']), 1) | |||||
self.assertEqual(len(self.loader.state['tool']), 1) | |||||
self.assertEqual(len(self.loader.state['provider']), 1) | |||||
codemeta = 'codemeta:' | codemeta = 'codemeta:' | ||||
origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065' | origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065' | ||||
expected_origin_metadata = { | expected_origin_metadata = { | ||||
'@xmlns': 'http://www.w3.org/2005/Atom', | '@xmlns': 'http://www.w3.org/2005/Atom', | ||||
'@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', | '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', | ||||
'author': { | 'author': { | ||||
'email': 'hal@ccsd.cnrs.fr', | 'email': 'hal@ccsd.cnrs.fr', | ||||
Show All 19 Lines | def test_inject_deposit_verify_metadata(self): | ||||
'external_identifier': 'hal-01243065', | 'external_identifier': 'hal-01243065', | ||||
'title': 'Composing a Web of Audio Applications', | 'title': 'Composing a Web of Audio Applications', | ||||
codemeta + 'description': 'this is the description', | codemeta + 'description': 'this is the description', | ||||
'id': 'hal-01243065', | 'id': 'hal-01243065', | ||||
'client': 'hal', | 'client': 'hal', | ||||
codemeta + 'keywords': 'DSP programming,Web', | codemeta + 'keywords': 'DSP programming,Web', | ||||
codemeta + 'developmentStatus': 'stable' | codemeta + 'developmentStatus': 'stable' | ||||
} | } | ||||
result = self.loader.state['origin_metadata'][0] | self.assertOriginMetadataContains('deposit', origin_url, | ||||
self.assertEqual(result['metadata'], expected_origin_metadata) | expected_origin_metadata) | ||||
self.assertEqual(result['tool_id'], TOOL_ID) | |||||
self.assertEqual(result['provider_id'], PROVIDER_ID) | |||||
deposit = Deposit.objects.get(pk=self.deposit_id) | deposit = Deposit.objects.get(pk=self.deposit_id) | ||||
self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') | self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') | ||||
self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( | self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( | ||||
deposit.swh_id, origin_url | deposit.swh_id, origin_url | ||||
)) | )) | ||||
self.assertRegex(deposit.swh_anchor_id, r'^swh:1:rev:.*') | self.assertRegex(deposit.swh_anchor_id, r'^swh:1:rev:.*') | ||||
self.assertEqual(deposit.swh_anchor_id_context, '%s;origin=%s' % ( | self.assertEqual(deposit.swh_anchor_id_context, '%s;origin=%s' % ( | ||||
deposit.swh_anchor_id, origin_url | deposit.swh_anchor_id, origin_url | ||||
)) | )) |