diff --git a/swh/deposit/tests/__init__.py b/swh/deposit/tests/__init__.py index de3abb55..7f71a48c 100644 --- a/swh/deposit/tests/__init__.py +++ b/swh/deposit/tests/__init__.py @@ -1,71 +1,71 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit.config import setup_django_for from swh.deposit.config import SWHDefaultConfig # noqa from swh.loader.core.loader import SWHLoader TEST_CONFIG = { 'max_upload_size': 500, 'extraction_dir': '/tmp/swh-deposit/test/extraction-dir', 'checks': False, 'provider': { 'provider_name': '', 'provider_type': 'deposit_client', 'provider_url': '', 'metadata': { } }, 'tool': { - 'tool_name': 'swh-deposit', - 'tool_version': '0.0.1', - 'tool_configuration': { + 'name': 'swh-deposit', + 'version': '0.0.1', + 'configuration': { 'sword_version': '2' } } } def parse_deposit_config_file(base_filename=None, config_filename=None, additional_configs=None, global_config=True): return TEST_CONFIG TEST_LOADER_CONFIG = { 'extraction_dir': '/tmp/swh-loader-tar/test/', 'storage': { - 'cls': 'remote', + 'cls': 'memory', 'args': { - 'url': 'http://localhost:unexisting-port/', } }, - 'send_contents': False, - 'send_directories': False, - 'send_revisions': False, - 'send_releases': False, - 'send_snapshot': False, + 'send_contents': True, + 'send_directories': True, + 'send_revisions': True, + 'send_releases': True, + 'send_snapshot': True, + 'content_size_limit': 100 * 1024 * 1024, 'content_packet_size': 10, 'content_packet_size_bytes': 100 * 1024 * 1024, 'directory_packet_size': 10, 'revision_packet_size': 10, 'release_packet_size': 10, } def parse_loader_config_file(base_filename=None, config_filename=None, additional_configs=None, global_config=True): return TEST_LOADER_CONFIG # monkey patch classes method permits to override, for tests purposes, # the default configuration without side-effect, i.e do not load the # configuration from disk SWHDefaultConfig.parse_config_file = parse_deposit_config_file SWHLoader.parse_config_file = parse_loader_config_file setup_django_for('testing') diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py index 2d7ec66b..9f40e701 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,205 +1,205 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ def test_read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { - 'tool_name': 'swh-deposit', - 'tool_version': '0.0.1', - 'tool_configuration': { + 'name': 'swh-deposit', + 'version': '0.0.1', + 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'committer_date': None, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'date': None, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEqual(deposit_parent.swh_id, swh_persistent_id) self.assertEqual(deposit_parent.external_id, 'some-external-id') self.assertEqual(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.external_id, 'some-external-id') self.assertEqual(deposit.swh_id, None) self.assertEqual(deposit.parent, deposit_parent) self.assertEqual(deposit.status, DEPOSIT_STATUS_PARTIAL) url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { - 'tool_name': 'swh-deposit', - 'tool_version': '0.0.1', - 'tool_configuration': { + 'name': 'swh-deposit', + 'version': '0.0.1', + 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'date': None, 'committer_date': None, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'parents': [swh_id] }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) def test_access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8'),) diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py index cb6e81d8..e8cea274 100644 --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -1,305 +1,169 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import unittest import shutil import pytest from rest_framework.test import APITestCase from swh.model import hashutil from swh.deposit.models import Deposit from swh.deposit.loader import loader from swh.deposit.config import ( PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT ) from django.core.urlresolvers import reverse +from swh.loader.core.tests import BaseLoaderStorageTest from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from .. import TEST_LOADER_CONFIG from ..common import (BasicTestCase, WithAuthTestCase, CommonCreationRoutine, FileSystemCreationRoutine) -TOOL_ID = 99 -PROVIDER_ID = 12 - - -class DepositLoaderInhibitsStorage: - """Mixin class to inhibit the persistence and keep in memory the data - sent for storage. - - cf. SWHDepositLoaderNoStorage - - """ - def __init__(self, client=None): - # client is not used here, transit it nonetheless to other mixins - super().__init__(client=client) - # typed data - self.state = { - 'origin': [], - 'origin_visit': [], - 'origin_metadata': [], - 'content': [], - 'directory': [], - 'revision': [], - 'release': [], - 'snapshot': [], - 'tool': [], - 'provider': [] - } - - def _add(self, type, l): - """Add without duplicates and keeping the insertion order. - - Args: - type (str): Type of objects concerned by the action - l ([object]): List of 'type' object - - """ - col = self.state[type] - for o in l: - if o in col: - continue - col.extend([o]) - - def send_origin(self, origin): - origin.update({'id': 1}) - self._add('origin', [origin]) - return origin['id'] - - def send_origin_visit(self, origin_id, visit_date): - origin_visit = { - 'origin': origin_id, - 'visit_date': visit_date, - 'visit': 1, - } - self._add('origin_visit', [origin_visit]) - return origin_visit - - def send_origin_metadata(self, origin_id, visit_date, provider_id, tool_id, - metadata): - origin_metadata = { - 'origin_id': origin_id, - 'visit_date': visit_date, - 'provider_id': provider_id, - 'tool_id': tool_id, - 'metadata': metadata - } - self._add('origin_metadata', [origin_metadata]) - return origin_metadata - - def send_tool(self, tool): - tool = { - 'tool_name': tool['tool_name'], - 'tool_version': tool['tool_version'], - 'tool_configuration': tool['tool_configuration'] - } - self._add('tool', [tool]) - tool_id = TOOL_ID - return tool_id - - def send_provider(self, provider): - provider = { - 'provider_name': provider['provider_name'], - 'provider_type': provider['provider_type'], - 'provider_url': provider['provider_url'], - 'metadata': provider['metadata'] - } - self._add('provider', [provider]) - provider_id = PROVIDER_ID - return provider_id - - def maybe_load_contents(self, contents): - self._add('content', contents) - - def maybe_load_directories(self, directories): - self._add('directory', directories) - - def maybe_load_revisions(self, revisions): - self._add('revision', revisions) - - def maybe_load_releases(self, releases): - self._add('release', releases) - - def maybe_load_snapshot(self, snapshot): - self._add('snapshot', [snapshot]) - - def open_fetch_history(self): - pass - - def close_fetch_history_failure(self, fetch_history_id): - pass - - def close_fetch_history_success(self, fetch_history_id): - pass - - def update_origin_visit(self, origin_id, visit, status): - self.status = status - - # Override to do nothing at the end - def close_failure(self): - pass - - def close_success(self): - pass - - class TestLoaderUtils(unittest.TestCase): def assertRevisionsOk(self, expected_revisions): # noqa: N802 """Check the loader's revisions match the expected revisions. Expects self.loader to be instantiated and ready to be inspected (meaning the loading took place). Args: expected_revisions (dict): Dict with key revision id, value the targeted directory id. """ # The last revision being the one used later to start back from for rev in self.loader.state['revision']: rev_id = hashutil.hash_to_hex(rev['id']) directory_id = hashutil.hash_to_hex(rev['directory']) self.assertEqual(expected_revisions[rev_id], directory_id) -class SWHDepositLoaderNoStorage(DepositLoaderInhibitsStorage, - loader.DepositLoader): - """Loader to test. - - It inherits from the actual deposit loader to actually test its - correct behavior. It also inherits from - DepositLoaderInhibitsStorage so that no persistence takes place. - - """ - pass - - @pytest.mark.fs class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, - FileSystemCreationRoutine, TestLoaderUtils): + FileSystemCreationRoutine, TestLoaderUtils, + BaseLoaderStorageTest): def setUp(self): super().setUp() # create the extraction dir used by the loader os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) # 1. create a deposit with archive and metadata self.deposit_id = self.create_simple_binary_deposit() # 2. Sets a basic client which accesses the test data loader_client = SWHDepositTestClient(self.client, config=CLIENT_TEST_CONFIG) - # 3. setup loader with no persistence and that client - self.loader = SWHDepositLoaderNoStorage(client=loader_client) + # 3. setup loader with that client + self.loader = loader.DepositLoader(client=loader_client) + + self.storage = self.loader.storage def tearDown(self): super().tearDown() shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) def test_inject_deposit_ready(self): """Load a deposit which is ready """ args = [self.collection.name, self.deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when - self.loader.load(archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) + res = self.loader.load(archive_url=archive_url, + deposit_meta_url=deposit_meta_url, + deposit_update_url=deposit_update_url) # then - self.assertEqual(len(self.loader.state['content']), 1) - self.assertEqual(len(self.loader.state['directory']), 1) - self.assertEqual(len(self.loader.state['revision']), 1) - self.assertEqual(len(self.loader.state['release']), 0) - self.assertEqual(len(self.loader.state['snapshot']), 1) + self.assertEqual(res['status'], 'eventful', res) + self.assertCountContents(1) + self.assertCountDirectories(1) + self.assertCountRevisions(1) + self.assertCountReleases(0) + self.assertCountSnapshots(1) def test_inject_deposit_verify_metadata(self): """Load a deposit with metadata, test metadata integrity """ self.deposit_metadata_id = self.add_metadata_to_deposit( self.deposit_id) args = [self.collection.name, self.deposit_metadata_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then - self.assertEqual(len(self.loader.state['content']), 1) - self.assertEqual(len(self.loader.state['directory']), 1) - self.assertEqual(len(self.loader.state['revision']), 1) - self.assertEqual(len(self.loader.state['release']), 0) - self.assertEqual(len(self.loader.state['snapshot']), 1) - self.assertEqual(len(self.loader.state['origin_metadata']), 1) - self.assertEqual(len(self.loader.state['tool']), 1) - self.assertEqual(len(self.loader.state['provider']), 1) + self.assertCountContents(1) + self.assertCountDirectories(1) + self.assertCountRevisions(1) + self.assertCountReleases(0) + self.assertCountSnapshots(1) codemeta = 'codemeta:' origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065' expected_origin_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, codemeta + 'url': origin_url, codemeta + 'runtimePlatform': 'phpstorm', codemeta + 'license': [ { codemeta + 'name': 'GNU General Public License v3.0 only' }, { codemeta + 'name': 'CeCILL Free Software License Agreement v1.1' # noqa } ], codemeta + 'author': { codemeta + 'name': 'Morane Gruenpeter' }, codemeta + 'programmingLanguage': ['php', 'python', 'C'], codemeta + 'applicationCategory': 'test', codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00', codemeta + 'version': '1', 'external_identifier': 'hal-01243065', 'title': 'Composing a Web of Audio Applications', codemeta + 'description': 'this is the description', 'id': 'hal-01243065', 'client': 'hal', codemeta + 'keywords': 'DSP programming,Web', codemeta + 'developmentStatus': 'stable' } - result = self.loader.state['origin_metadata'][0] - self.assertEqual(result['metadata'], expected_origin_metadata) - self.assertEqual(result['tool_id'], TOOL_ID) - self.assertEqual(result['provider_id'], PROVIDER_ID) + self.assertOriginMetadataContains('deposit', origin_url, + expected_origin_metadata) deposit = Deposit.objects.get(pk=self.deposit_id) self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( deposit.swh_id, origin_url )) self.assertRegex(deposit.swh_anchor_id, r'^swh:1:rev:.*') self.assertEqual(deposit.swh_anchor_id_context, '%s;origin=%s' % ( deposit.swh_anchor_id, origin_url ))