diff --git a/swh/deposit/tests/api/test_deposit_list.py b/swh/deposit/tests/api/test_deposit_list.py index a3be748c..bcdfe9b9 100644 --- a/swh/deposit/tests/api/test_deposit_list.py +++ b/swh/deposit/tests/api/test_deposit_list.py @@ -1,86 +1,83 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import pytest - from django.urls import reverse from rest_framework import status from swh.deposit.api.converters import convert_status_detail from swh.deposit.config import ( DEPOSIT_STATUS_PARTIAL, PRIVATE_LIST_DEPOSITS, DEPOSIT_STATUS_DEPOSITED ) -@pytest.mark.django_db def test_deposit_list( partial_deposit, deposited_deposit, authenticated_client): """Deposit list api should return the deposits """ status_detail = { 'url': { 'summary': 'At least one compatible url field. Failed', 'fields': ['testurl'], }, 'metadata': [ { 'summary': 'Mandatory fields missing', 'fields': ['9', 10, 1.212], }, ], 'archive': [ { 'summary': 'Invalid archive', 'fields': ['3'], }, { 'summary': 'Unsupported archive', 'fields': [2], } ], } partial_deposit.status_detail = status_detail partial_deposit.save() deposit_id = partial_deposit.id deposit_id2 = deposited_deposit.id # NOTE: does not work as documented # https://docs.djangoproject.com/en/1.11/ref/urlresolvers/#django.core.urlresolvers.reverse # noqa # url = reverse(PRIVATE_LIST_DEPOSITS, kwargs={'page_size': 1}) main_url = reverse(PRIVATE_LIST_DEPOSITS) url = '%s?page_size=1' % main_url response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() assert data['count'] == 2 # 2 deposits expected_next = '%s?page=2&page_size=1' % main_url assert data['next'].endswith(expected_next) is True assert data['previous'] is None assert len(data['results']) == 1 # page of size 1 deposit = data['results'][0] assert deposit['id'] == deposit_id assert deposit['status'] == DEPOSIT_STATUS_PARTIAL expected_status_detail = convert_status_detail(status_detail) assert deposit['status_detail'] == expected_status_detail # then 2nd page response2 = authenticated_client.get(expected_next) assert response2.status_code == status.HTTP_200_OK data2 = response2.json() assert data2['count'] == 2 # still 2 deposits assert data2['next'] is None expected_previous = '%s?page_size=1' % main_url assert data2['previous'].endswith(expected_previous) is True assert len(data2['results']) == 1 # page of size 1 deposit2 = data2['results'][0] assert deposit2['id'] == deposit_id2 assert deposit2['status'] == DEPOSIT_STATUS_DEPOSITED diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index f0e04cef..89d316a0 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,317 +1,316 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import base64 import pytest import psycopg2 from django.urls import reverse from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from rest_framework import status from rest_framework.test import APIClient from typing import Mapping from swh.scheduler.tests.conftest import * # noqa from swh.deposit.parsers import parse_xml from swh.deposit.config import ( COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, - DEPOSIT_STATUS_LOAD_FAILURE + DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_LOAD_FAILURE ) from swh.deposit.tests.common import create_arborescence_archive TEST_USER = { 'username': 'test', 'password': 'password', 'email': 'test@example.org', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'domain': 'archives-ouvertes.fr/', 'collection': { 'name': 'test' }, } -@pytest.fixture(autouse=True, scope='session') -def swh_proxy(): - """Automatically inject this fixture in all tests to ensure no outside - connection takes place. - - """ - os.environ['http_proxy'] = 'http://localhost:999' - os.environ['https_proxy'] = 'http://localhost:999' - - def execute_sql(sql): """Execute sql to postgres db""" with psycopg2.connect(database='postgres') as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) @pytest.hookimpl(tryfirst=True) def pytest_load_initial_conftests(early_config, parser, args): """This hook is done prior to django loading. Used to initialize the deposit's server db. """ import project.app.signals def prepare_db(*args, **kwargs): from django.conf import settings db_name = 'tests' - print('before: %s' % settings.DATABASES) # work around db settings for django for k, v in [ ('ENGINE', 'django.db.backends.postgresql'), ('NAME', 'tests'), ('USER', postgresql_proc.user), # noqa ('HOST', postgresql_proc.host), # noqa ('PORT', postgresql_proc.port), # noqa ]: settings.DATABASES['default'][k] = v - print('after: %s' % settings.DATABASES) execute_sql('DROP DATABASE IF EXISTS %s' % db_name) execute_sql('CREATE DATABASE %s TEMPLATE template0' % db_name) project.app.signals.something = prepare_db +@pytest.fixture(autouse=True, scope='session') +def swh_proxy(): + """Automatically inject this fixture in all tests to ensure no outside + connection takes place. + + """ + os.environ['http_proxy'] = 'http://localhost:999' + os.environ['https_proxy'] = 'http://localhost:999' + + def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection try: collection = DepositCollection._default_manager.get( name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection def deposit_collection_factory( collection_name=TEST_USER['collection']['name']): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory() deposit_another_collection = deposit_collection_factory('another-collection') @pytest.fixture def deposit_user(db, deposit_collection): """Create/Return the test_user "test" """ from swh.deposit.models import DepositClient try: user = DepositClient._default_manager.get( username=TEST_USER['username']) except DepositClient.DoesNotExist: user = DepositClient._default_manager.create_user( username=TEST_USER['username'], email=TEST_USER['email'], password=TEST_USER['password'], provider_url=TEST_USER['provider_url'], domain=TEST_USER['domain'], ) user.collections = [deposit_collection.id] user.save() return user @pytest.fixture def client(): """Override pytest-django one which does not work for djangorestframework. """ return APIClient() # <- drf's client @pytest.yield_fixture def authenticated_client(client, deposit_user): """Returned a logged client """ _token = '%s:%s' % (deposit_user.username, TEST_USER['password']) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') client.credentials(HTTP_AUTHORIZATION=authorization) yield client client.logout() @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( tmp_path, 'archive1', 'file1', b'some content in file') return archive def create_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = authenticated_client.post( url, content_type='application/zip', # as zip data=sample_archive['data'], # + headers CONTENT_LENGTH=sample_archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( sample_archive['name'])) # then assert response.status_code == status.HTTP_201_CREATED from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(external_id=external_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, atom_dataset: Mapping[str, bytes] = {}): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( authenticated_client, collection_name, sample_archive, external_id=external_id, deposit_status=DEPOSIT_STATUS_PARTIAL) response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]), content_type='application/atom+xml;type=entry', data=atom_dataset['entry-data0'] % deposit.external_id.encode('utf-8'), HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit.id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED): """Build deposit with a specific status """ @pytest.fixture() def _deposit(sample_archive, deposit_collection, authenticated_client, deposit_status=deposit_status): external_id = 'external-id-%s' % deposit_status return create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id=external_id, deposit_status=deposit_status ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL) +verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( sample_archive, deposit_collection, authenticated_client, atom_dataset): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id='external-id-partial', deposit_status=DEPOSIT_STATUS_PARTIAL, atom_dataset=atom_dataset ) @pytest.fixture def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset): response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type='application/atom+xml;type=entry', data=atom_dataset['entry-data1'], HTTP_SLUG='external-id-partial', HTTP_IN_PROGRESS=True) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content['deposit_id'] from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id='external-id-complete', deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS ) _swh_id_context = 'https://hal.archives-ouvertes.fr/hal-01727745' deposit.swh_id = 'swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b' deposit.swh_id_context = '%s;%s' % ( deposit.swh_id, _swh_id_context) deposit.swh_anchor_id = \ 'swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' deposit.swh_anchor_id_context = '%s;%s' % ( deposit.swh_anchor_id, _swh_id_context) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version diff --git a/swh/deposit/tests/loader/common.py b/swh/deposit/tests/loader/common.py index 60eaeb65..d56fc928 100644 --- a/swh/deposit/tests/loader/common.py +++ b/swh/deposit/tests/loader/common.py @@ -1,54 +1,124 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json +from typing import Dict from swh.deposit.client import PrivateApiDepositClient +from swh.model.hashutil import hash_to_bytes, hash_to_hex CLIENT_TEST_CONFIG = { 'url': 'http://nowhere:9000/', 'auth': {}, # no authentication in test scenario } class SWHDepositTestClient(PrivateApiDepositClient): """Deposit test client to permit overriding the default request client. """ def __init__(self, client, config): super().__init__(config=config) self.client = client def archive_get(self, archive_update_url, archive_path, log=None): r = self.client.get(archive_update_url) with open(archive_path, 'wb') as f: for chunk in r.streaming_content: f.write(chunk) return archive_path def metadata_get(self, metadata_url, log=None): r = self.client.get(metadata_url) return json.loads(r.content.decode('utf-8')) def status_update(self, update_status_url, status, revision_id=None, directory_id=None, origin_url=None): payload = {'status': status} if revision_id: payload['revision_id'] = revision_id if directory_id: payload['directory_id'] = directory_id if origin_url: payload['origin_url'] = origin_url self.client.put(update_status_url, content_type='application/json', data=json.dumps(payload)) def check(self, check_url): r = self.client.get(check_url) data = json.loads(r.content.decode('utf-8')) return data['status'] + + +def get_stats(storage) -> Dict: + """Adaptation utils to unify the stats counters across storage + implementation. + + """ + storage.refresh_stat_counters() + stats = storage.stat_counters() + + keys = ['content', 'directory', 'origin', 'origin_visit', 'person', + 'release', 'revision', 'skipped_content', 'snapshot'] + return {k: stats.get(k) for k in keys} + + +def decode_target(target): + """Test helper to ease readability in test + + """ + if not target: + return target + target_type = target['target_type'] + + if target_type == 'alias': + decoded_target = target['target'].decode('utf-8') + else: + decoded_target = hash_to_hex(target['target']) + + return { + 'target': decoded_target, + 'target_type': target_type + } + + +def check_snapshot(expected_snapshot, storage): + """Check for snapshot match. + + Provide the hashes as hexadecimal, the conversion is done + within the method. + + Args: + expected_snapshot (dict): full snapshot with hex ids + storage (Storage): expected storage + + """ + expected_snapshot_id = expected_snapshot['id'] + expected_branches = expected_snapshot['branches'] + snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) + if snap is None: + # display known snapshots instead if possible + if hasattr(storage, '_snapshots'): # in-mem storage + from pprint import pprint + for snap_id, (_snap, _) in storage._snapshots.items(): + snapd = _snap.to_dict() + snapd['id'] = hash_to_hex(snapd['id']) + branches = { + branch.decode('utf-8'): decode_target(target) + for branch, target in snapd['branches'].items() + } + snapd['branches'] = branches + pprint(snapd) + raise AssertionError('Snapshot is not found') + + branches = { + branch.decode('utf-8'): decode_target(target) + for branch, target in snap['branches'].items() + } + assert expected_branches == branches diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py index ec35a8c1..98e4223e 100644 --- a/swh/deposit/tests/loader/conftest.py +++ b/swh/deposit/tests/loader/conftest.py @@ -1,36 +1,67 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import re import os import pytest import yaml +from functools import partial + +from swh.core.pytest_plugin import get_response_cb from swh.scheduler.tests.conftest import * # noqa +from swh.storage.tests.conftest import * # noqa from swh.deposit.loader.checker import DepositChecker +from swh.deposit.loader.loader import DepositLoader @pytest.fixture(scope='session') def celery_includes(): return [ 'swh.deposit.loader.tasks', ] @pytest.fixture -def swh_config(tmp_path, monkeypatch): +def swh_config(tmp_path, swh_storage_postgresql, monkeypatch): storage_config = { 'url': 'https://deposit.softwareheritage.org/', + 'storage': { + 'cls': 'local', + 'args': { + 'db': swh_storage_postgresql.dsn, + 'objstorage': { + 'cls': 'memory', + 'args': {} + }, + }, + }, } conffile = os.path.join(tmp_path, 'deposit.yml') with open(conffile, 'w') as f: f.write(yaml.dump(storage_config)) monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) return conffile @pytest.fixture def deposit_checker(swh_config): return DepositChecker() + + +@pytest.fixture +def deposit_loader(swh_config): + return DepositLoader() + + +@pytest.fixture +def requests_mock_datadir(datadir, requests_mock_datadir): + """Override default behavior to deal with put method + + """ + cb = partial(get_response_cb, datadir=datadir) + requests_mock_datadir.put(re.compile('https://'), body=cb) + return requests_mock_datadir diff --git a/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta new file mode 100644 index 00000000..0451c714 --- /dev/null +++ b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta @@ -0,0 +1,69 @@ +{ + "branch_name": "master", + "origin": { + "type": "deposit", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id" + }, + "origin_metadata": { + "metadata": { + "@xmlns": ["http://www.w3.org/2005/Atom"], + "author": [ + "some awesome author", + "another one", + "no one" + ], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id" + }, + "provider": { + "metadata": {}, + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/" + }, + "tool": { + "configuration": {"sword_version": "2"}, + "name": "swh-deposit", + "version": "0.0.1" + } + }, + "revision": { + "author": { + "name": "Software Heritage", + "fullname": "Software Heritage", + "email": "robot@softwareheritage.org" + }, + "committer": { + "name": "Software Heritage", + "fullname": "Software Heritage", + "email": "robot@softwareheritage.org" + }, + "committer_date": { + "negative_utc": "false", + "offset": 0, + "timestamp": { + "microseconds": 0, + "seconds": 1507389428 + } + }, + "date": { + "negative_utc": "false", + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1507389428} + }, + "message": "test: Deposit 999 in collection test", + "metadata": { + "@xmlns": ["http://www.w3.org/2005/Atom"], + "author": ["some awesome author", + "another one", + "no one"], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id" + }, + "synthetic": "true", + "type": "tar", + "parents": [] + } +} diff --git a/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw new file mode 100644 index 00000000..cae6b33c Binary files /dev/null and b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw differ diff --git a/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update new file mode 100644 index 00000000..52c33a57 --- /dev/null +++ b/swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update @@ -0,0 +1 @@ +"ok" diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py index 323ba137..3c6924df 100644 --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -1,172 +1,108 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os -import unittest -import shutil - -import pytest -from rest_framework.test import APITestCase - -from swh.model import hashutil -from swh.deposit.models import Deposit -from swh.deposit.loader import loader from swh.deposit.config import ( PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT ) from django.urls import reverse -from swh.loader.core.tests import BaseLoaderStorageTest - -from swh.deposit import utils - -from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG -from .. import TEST_LOADER_CONFIG -from ..common import (BasicTestCase, WithAuthTestCase, - CommonCreationRoutine, - FileSystemCreationRoutine) - - -class TestLoaderUtils(unittest.TestCase): - def assertRevisionsOk(self, expected_revisions): # noqa: N802 - """Check the loader's revisions match the expected revisions. - - Expects self.loader to be instantiated and ready to be - inspected (meaning the loading took place). - - Args: - expected_revisions (dict): Dict with key revision id, - value the targeted directory id. - - """ - # The last revision being the one used later to start back from - for rev in self.loader.state['revision']: - rev_id = hashutil.hash_to_hex(rev['id']) - directory_id = hashutil.hash_to_hex(rev['directory']) - - self.assertEqual(expected_revisions[rev_id], directory_id) - - -@pytest.mark.fs -class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, - BasicTestCase, CommonCreationRoutine, - FileSystemCreationRoutine, TestLoaderUtils, - BaseLoaderStorageTest): - - def setUp(self): - super().setUp() - - # create the extraction dir used by the loader - os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) - - # Sets a basic client which accesses the test data - loader_client = SWHDepositTestClient(self.client, - config=CLIENT_TEST_CONFIG) - # Setup loader with that client - self.loader = loader.DepositLoader(client=loader_client) - - self.storage = self.loader.storage - - def tearDown(self): - super().tearDown() - shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) - - def test_inject_deposit_ready(self): - """Load a deposit which is ready - - """ - # create a deposit with archive and metadata - deposit_id = self.create_simple_binary_deposit() - self.update_binary_deposit(deposit_id, status_partial=False) - - args = [self.collection.name, deposit_id] - - archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) - deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) - deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) - - # when - res = self.loader.load(archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - # then - self.assertEqual(res['status'], 'eventful', res) - self.assertCountContents(1) - self.assertCountDirectories(1) - self.assertCountRevisions(1) - self.assertCountReleases(0) - self.assertCountSnapshots(1) - - def test_inject_deposit_verify_metadata(self): - """Load a deposit with metadata, test metadata integrity - - """ - deposit_id = self.create_simple_binary_deposit() - self.add_metadata_to_deposit(deposit_id, status_partial=False) - args = [self.collection.name, deposit_id] - - archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) - deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) - deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) - - # when - self.loader.load(archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - # then - self.assertCountContents(1) - self.assertCountDirectories(1) - self.assertCountRevisions(1) - self.assertCountReleases(0) - self.assertCountSnapshots(1) - - codemeta = 'codemeta:' - deposit = Deposit.objects.get(pk=deposit_id) - origin_url = utils.origin_url_from(deposit) - - expected_origin_metadata = { - '@xmlns': 'http://www.w3.org/2005/Atom', - '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', - 'author': { - 'email': 'hal@ccsd.cnrs.fr', - 'name': 'HAL' - }, - codemeta + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # same as xml # noqa - codemeta + 'runtimePlatform': 'phpstorm', - codemeta + 'license': [ - { - codemeta + 'name': 'GNU General Public License v3.0 only' - }, +from swh.model.hashutil import hash_to_bytes + +from .common import get_stats, check_snapshot + + +def test_inject_deposit_ready( + swh_config, requests_mock_datadir, datadir, deposit_loader): + """Load a deposit which is ready + + """ + args = ['test', 999] + archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) + deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) + deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) + + # when + res = deposit_loader.load( + archive_url=archive_url, + deposit_meta_url=deposit_meta_url, + deposit_update_url=deposit_update_url) + + # then + assert res['status'] == 'eventful' + stats = get_stats(deposit_loader.storage) + + assert { + 'content': 303, + 'skipped_content': 0, + 'directory': 12, + 'origin': 1, + 'origin_visit': 1, + 'person': 1, + 'release': 0, + 'revision': 1, + 'snapshot': 1, + } == stats + + origin_url = 'https://hal-test.archives-ouvertes.fr/some-external-id' + rev_id = 'b1bef04d90ef3ba645df4c4f945748c173a4e9a2' + dir_id = 'bed9acbf2a4502499f659e65a2ab77096bd46a1d' + + expected_revision = { + 'author': { + 'name': b'Software Heritage', + 'fullname': b'Software Heritage', + 'email': b'robot@softwareheritage.org'}, + 'committer': { + 'name': b'Software Heritage', + 'fullname': b'Software Heritage', + 'email': b'robot@softwareheritage.org'}, + 'committer_date': { + 'negative_utc': 'false', + 'offset': 0, + 'timestamp': {'microseconds': 0, 'seconds': 1507389428}}, + 'date': { + 'negative_utc': 'false', + 'offset': 0, + 'timestamp': {'microseconds': 0, 'seconds': 1507389428}}, + 'message': b'test: Deposit 999 in collection test', + 'metadata': { + '@xmlns': ['http://www.w3.org/2005/Atom'], + 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', + 'external_identifier': 'some-external-id', + 'url': origin_url, + 'original_artifact': [ { - codemeta + 'name': 'CeCILL Free Software License Agreement v1.1' # noqa + 'name': 'archive.zip', + 'archive_type': 'tar', + 'length': 725946, + 'blake2s256': '04fffd328441d216c92492ad72d37388d8c77889880b069151298786fd48d889', # noqa + 'sha256': '31e066137a962676e89f69d1b65382de95a7ef7d914b8cb956f41ea72e0f516b', # noqa + 'sha1': 'f7bebf6f9c62a2295e889f66e05ce9bfaed9ace3', + 'sha1_git': 'cae6b33cc33faafd2d6bd86c6b4273f9338c69c2' } - ], - codemeta + 'author': { - codemeta + 'name': 'Morane Gruenpeter' - }, - codemeta + 'programmingLanguage': ['php', 'python', 'C'], - codemeta + 'applicationCategory': 'test', - codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00', - codemeta + 'version': '1', - 'external_identifier': 'hal-01243065', - 'title': 'Composing a Web of Audio Applications', - codemeta + 'description': 'this is the description', - 'id': 'hal-01243065', - 'client': 'hal', - codemeta + 'keywords': 'DSP programming,Web', - codemeta + 'developmentStatus': 'stable' + ] + }, + 'synthetic': True, + 'type': 'tar', + 'parents': [], + 'directory': hash_to_bytes(dir_id), + 'id': hash_to_bytes(rev_id), + } + + rev = next(deposit_loader.storage.revision_get([hash_to_bytes(rev_id)])) + assert rev is not None + assert expected_revision == rev + + expected_snapshot = { + 'id': '823109c16f9948c6f88cc5dec8e278da1487f06d', + 'branches': { + 'master': { + 'target': rev_id, + 'target_type': 'revision' + } } - self.assertOriginMetadataContains('deposit', origin_url, - expected_origin_metadata) + } - self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') - self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( - deposit.swh_id, origin_url - )) - self.assertRegex(deposit.swh_anchor_id, r'^swh:1:rev:.*') - self.assertEqual(deposit.swh_anchor_id_context, '%s;origin=%s' % ( - deposit.swh_anchor_id, origin_url - )) + check_snapshot(expected_snapshot, deposit_loader.storage)