diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 4a9aaaa8..954a3dfe 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,96 +1,103 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict, List, Tuple + from rest_framework.permissions import AllowAny from swh.deposit import utils from swh.deposit.api.common import AuthenticatedAPIView from swh.deposit.errors import NOT_FOUND, make_error_dict from ...config import METADATA_TYPE, APIConfig from ...models import Deposit, DepositRequest class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit ).order_by("id") for deposit_request in deposit_requests: yield deposit_request - def _metadata_get(self, deposit): - """Given a deposit, aggregate all metadata requests. + def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]: + """Given a deposit, retrieve all metadata requests into one Dict and returns both that + aggregated metadata dict and the list of raw_metdadata. Args: - deposit (Deposit): The deposit instance to extract - metadata from. + deposit: The deposit instance to extract metadata from Returns: - metadata dict from the deposit. + Tuple of aggregated metadata dict, list of raw_metadata """ - metadata = ( - m.metadata - for m in self._deposit_requests(deposit, request_type=METADATA_TYPE) - ) - return utils.merge(*metadata) + metadata: List[Dict[str, Any]] = [] + raw_metadata: List[str] = [] + for deposit_request in self._deposit_requests( + deposit, request_type=METADATA_TYPE + ): + metadata.append(deposit_request.metadata) + raw_metadata.append(deposit_request.raw_metadata) + + aggregated_metadata = utils.merge(*metadata) + return (aggregated_metadata, raw_metadata) class APIPrivateView(APIConfig, AuthenticatedAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny,) def checks(self, req, collection_name, deposit_id=None): """Override default checks implementation to allow empty collection. """ if deposit_id: try: Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, "Deposit with id %s does not exist" % deposit_id ) headers = self._read_headers(req) checks = self.additional_checks(req, headers, collection_name, deposit_id) if "error" in checks: return checks return {"headers": headers} def get( self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): return super().get(request, collection_name, deposit_id) def put( self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): return super().put(request, collection_name, deposit_id) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index e5172e07..bf6ed542 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,187 +1,188 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from itertools import chain import re from shutil import get_unpack_formats import tarfile from typing import Dict, Optional, Tuple import zipfile from rest_framework import status +from rest_framework.request import Request from swh.scheduler.utils import create_oneshot_task_dict from . import APIPrivateView, DepositReadMixin from ...config import ARCHIVE_TYPE, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_VERIFIED from ...models import Deposit, DepositRequest from ..checks import check_metadata from ..common import APIGet MANDATORY_ARCHIVE_UNREADABLE = ( "At least one of its associated archives is not readable" # noqa ) MANDATORY_ARCHIVE_INVALID = ( "Mandatory archive is invalid (i.e contains only one archive)" # noqa ) MANDATORY_ARCHIVE_UNSUPPORTED = "Mandatory archive type is not supported" MANDATORY_ARCHIVE_MISSING = "Deposit without archive is rejected" ARCHIVE_EXTENSIONS = [ "zip", "tar", "tar.gz", "xz", "tar.xz", "bz2", "tar.bz2", "Z", "tar.Z", "tgz", "7z", ] PATTERN_ARCHIVE_EXTENSION = re.compile(r".*\.(%s)$" % "|".join(ARCHIVE_EXTENSIONS)) def known_archive_format(filename): return any( filename.endswith(t) for t in chain(*(x[1] for x in get_unpack_formats())) ) class APIChecks(APIPrivateView, APIGet, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _check_deposit_archives(self, deposit: Deposit) -> Tuple[bool, Optional[Dict]]: """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests(deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, {"archive": [{"summary": MANDATORY_ARCHIVE_MISSING,}]} errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: errors.append( {"summary": error_message, "fields": [archive_request.id]} ) if not errors: return True, None return False, {"archive": errors} def _check_archive( self, archive_request: DepositRequest ) -> Tuple[bool, Optional[str]]: """Check that a deposit associated archive is ok: - readable - supported archive format - valid content: the archive does not contain a single archive file If any of those checks are not ok, return the corresponding failing check. Args: archive_path (DepositRequest): Archive to check Returns: (True, None) if archive is check compliant, (False, ) otherwise. """ archive_path = archive_request.archive.path if not known_archive_format(archive_path): return False, MANDATORY_ARCHIVE_UNSUPPORTED try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as zipfile_: files = zipfile_.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as tarfile_: files = tarfile_.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] if PATTERN_ARCHIVE_EXTENSION.match(element): # archive in archive! return False, MANDATORY_ARCHIVE_INVALID return True, None def process_get( - self, req, collection_name: str, deposit_id: int + self, req: Request, collection_name: str, deposit_id: int ) -> Tuple[int, Dict, str]: """Build a unique tarball from the multiple received and stream that content to the client. Args: - req (Request): - collection_name (str): Collection owning the deposit - deposit_id (id): Deposit concerned by the reading + req: Client request + collection_name: Collection owning the deposit + deposit_id: Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) - metadata = self._metadata_get(deposit) + metadata, _ = self._metadata_get(deposit) problems: Dict = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: assert error_detail is not None problems.update(error_detail) metadata_status, error_detail = check_metadata(metadata) if not metadata_status: assert error_detail is not None problems.update(error_detail) deposit_status = archives_status and metadata_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { "status": deposit.status, "details": deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { "status": deposit.status, } if not deposit.load_task_id and self.config["checks"]: url = deposit.origin_url task = create_oneshot_task_dict( "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 ) load_task_id = self.scheduler.create_tasks([task])[0]["id"] deposit.load_task_id = load_task_id deposit.save() return status.HTTP_200_OK, response, "application/json" diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 2a5e58aa..c783d5f5 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,188 +1,207 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from contextlib import contextmanager import os import shutil import tempfile from typing import Any, Dict, Tuple from rest_framework import status from swh.core import tarball from swh.deposit.utils import normalize_date from swh.model import identifiers from swh.model.model import MetadataAuthorityType from . import APIPrivateView, DepositReadMixin from ...config import ARCHIVE_TYPE, SWH_PERSON from ...models import Deposit from ..common import APIGet @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ # rebuild one zip archive from (possibly) multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate") os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = shutil.make_archive( aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir ) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) class APIReadArchives(APIPrivateView, APIGet, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def __init__(self): super().__init__() self.extraction_dir = self.config["extraction_dir"] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def process_get( self, request, collection_name: str, deposit_id: int ) -> Tuple[int, Any, str]: """Build a unique tarball from the multiple received and stream that content to the client. Args: request (Request): collection_name: Collection owning the deposit deposit_id: Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = [ r.archive.path for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE) ] return ( status.HTTP_200_OK, aggregate_tarballs(self.extraction_dir, archive_paths), "swh/generator", ) class APIReadMetadata(APIPrivateView, APIGet, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. Args: deposit (Deposit): Deposit model representation metadata (Dict): Metadata dict representation Returns: Tuple of author date, committer date. Those dates are swh normalized. """ commit_date = metadata.get("codemeta:datePublished") author_date = metadata.get("codemeta:dateCreated") if author_date and commit_date: pass elif commit_date: author_date = commit_date elif author_date: commit_date = author_date else: author_date = deposit.complete_date commit_date = deposit.complete_date return (normalize_date(author_date), normalize_date(commit_date)) - def metadata_read(self, deposit): - """Read and aggregate multiple data on deposit into one unified data - dictionary. + def metadata_read(self, deposit: Deposit) -> Dict[str, Any]: + """Read and aggregate multiple deposit information into one unified dictionary. Args: - deposit (Deposit): Deposit concerned by the data aggregation. + deposit: Deposit concerned by the data aggregation. Returns: - Dictionary of data representing the deposit to inject in swh. + Dictionary of deposit information read by the deposit loader, with the + following keys: + + **origin** (Dict): Information about the origin + + **origin_metadata (Dict): Metadata about the origin to load + + **metadata_raw** (List[str]): List of raw metadata received for the + deposit + + **metadata_dict** (Dict): Deposit aggregated metadata into one dict + + **provider** (Dict): the metadata provider information about the + deposit client + + **tool** (Dict): the deposit information + + **deposit** (Dict): deposit information relevant to build the revision + (author_date, committer_date, etc...) """ - metadata = self._metadata_get(deposit) + metadata, raw_metadata = self._metadata_get(deposit) # Read information metadata data = {"origin": {"type": "deposit", "url": deposit.origin_url,}} author_date, commit_date = self._normalize_dates(deposit, metadata) if deposit.parent: - swh_persistent_id = deposit.parent.swhid - swhid = identifiers.parse_swhid(swh_persistent_id) + parent_swhid = deposit.parent.swhid + assert parent_swhid is not None + swhid = identifiers.parse_swhid(parent_swhid) parent_revision = swhid.object_id parents = [parent_revision] else: parents = [] data["origin_metadata"] = { # metadata provider "provider": { "provider_name": deposit.client.last_name, "provider_url": deposit.client.provider_url, "provider_type": MetadataAuthorityType.DEPOSIT_CLIENT.value, "metadata": {}, }, "tool": self.tool, - "metadata": metadata, + "metadata_raw": raw_metadata, + "metadata_dict": metadata, } data["deposit"] = { "id": deposit.id, "client": deposit.client.username, "collection": deposit.collection.name, "author": SWH_PERSON, "author_date": author_date, "committer": SWH_PERSON, "committer_date": commit_date, "revision_parents": parents, } return data def process_get( self, request, collection_name: str, deposit_id: int ) -> Tuple[int, Dict, str]: deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) return status.HTTP_200_OK, data if data else {}, "application/json" diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py index 8c745d94..ba5d2a87 100644 --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -1,543 +1,424 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status -from swh.deposit import __version__ +from swh.deposit import __version__, utils from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON from swh.deposit.models import Deposit +from swh.deposit.parsers import parse_xml PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc" def private_get_raw_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" deposit_id = deposit if isinstance(deposit, int) else deposit.id return [ reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection.name, deposit_id]), reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, args=[deposit_id]), ] -def update_deposit(authenticated_client, collection, deposit, atom_dataset): - for atom_data in ["entry-data2", "entry-data3"]: - update_deposit_with_metadata( - authenticated_client, collection, deposit, atom_dataset[atom_data] - ) - return deposit - - def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata): # update deposit's metadata response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[collection.name, deposit.id]), content_type="application/atom+xml;type=entry", data=metadata, HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED return deposit def test_read_metadata( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Private metadata read api to existing deposit should return metadata """ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + + metadata_xml_atoms = [ + atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] + ] + metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] + for atom_xml in metadata_xml_atoms: + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - - expected_meta = { + assert data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "origin_metadata": { - "metadata": { - "author": ["some awesome author", "another one", "no one"], - "codemeta:dateCreated": "2017-10-07T15:17:08Z", - "external_identifier": "some-external-id", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa - }, + "metadata_raw": metadata_xml_atoms, + "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": "test", "id": deposit.id, "collection": "test", "revision_parents": [], }, } - assert data == expected_meta - def test_read_metadata_revision_with_parent( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Private read metadata to a deposit (with parent) returns metadata """ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + metadata_xml_atoms = [ + atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] + ] + metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] + for atom_xml in metadata_xml_atoms: + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml, + ) + rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" swhid = "swh:1:rev:%s" % rev_id fake_parent = Deposit( swhid=swhid, client=deposit.client, collection=deposit.collection ) fake_parent.save() deposit.parent = fake_parent deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - - expected_meta = { + assert data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "origin_metadata": { - "metadata": { - "author": ["some awesome author", "another one", "no one"], - "codemeta:dateCreated": "2017-10-07T15:17:08Z", - "external_identifier": "some-external-id", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa - }, + "metadata_raw": metadata_xml_atoms, + "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": "test", "id": deposit.id, "collection": "test", "revision_parents": [rev_id], }, } - assert data == expected_meta - def test_read_metadata_3( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """date(Created|Published) provided, uses author/committer date """ deposit = partial_deposit deposit.external_id = "hal-01243065" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = ( atom_dataset["metadata"] % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 """ ) - update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, codemeta_entry_data - ) + metadata_xml_atoms = [ + atom_dataset["entry-data2"], + atom_dataset["entry-data3"], + codemeta_entry_data, + ] + metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] + for atom_xml in metadata_xml_atoms: + update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - - metadata = { - "author": [ - "some awesome author", - "another one", - "no one", - {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - ], - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:dateCreated": [ - "2017-10-07T15:17:08Z", - "2015-04-06T17:08:47+02:00", - ], - "codemeta:datePublished": "2017-05-03T16:08:47+02:00", - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - {"codemeta:name": "GNU General Public License v3.0 only"}, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa - "codemeta:version": "1", - "external_identifier": ["some-external-id", "hal-01243065"], - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", - } - expected_meta = { + assert data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, "origin_metadata": { - "metadata": metadata, + "metadata_raw": metadata_xml_atoms, + "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1493820527}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } - assert data == expected_meta def test_read_metadata_4( authenticated_client, deposit_collection, atom_dataset, partial_deposit ): """dateCreated/datePublished not provided, revision uses complete_date """ deposit = partial_deposit codemeta_entry_data = atom_dataset["metadata"] % "" deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, codemeta_entry_data ) # will use the deposit completed date as fallback date deposit.complete_date = "2016-04-06" deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - metadata = { - "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - { - "codemeta:name": "GNU " - "General " - "Public " - "License " - "v3.0 " - "only" - }, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", - "codemeta:version": "1", - "external_identifier": "hal-01243065", - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - } - expected_origin = { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id), } expected_origin_metadata = { - "metadata": metadata, + "metadata_raw": [codemeta_entry_data], + "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, } expected_deposit_info = { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1459900800}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1459900800}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], } expected_meta = { "origin": expected_origin, "origin_metadata": expected_origin_metadata, "deposit": expected_deposit_info, } assert data == expected_meta def test_read_metadata_5( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """dateCreated/datePublished provided, revision uses author/committer date If multiple dateCreated provided, the first occurrence (of dateCreated) is selected. If multiple datePublished provided, the first occurrence (of datePublished) is selected. """ deposit = partial_deposit # add metadata to the deposit with multiple datePublished/dateCreated codemeta_entry_data = ( atom_dataset["metadata"] % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 2016-04-06T17:08:47+02:00 2018-05-03T16:08:47+02:00 """ ) deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, codemeta_entry_data ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() expected_origin = { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/external-id-partial", } - metadata = { - "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:dateCreated": [ - "2015-04-06T17:08:47+02:00", - "2016-04-06T17:08:47+02:00", - ], - "codemeta:datePublished": [ - "2017-05-03T16:08:47+02:00", - "2018-05-03T16:08:47+02:00", - ], - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - { - "codemeta:name": "GNU " - "General " - "Public " - "License " - "v3.0 " - "only" - }, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa - "codemeta:version": "1", - "external_identifier": "hal-01243065", - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - } - expected_origin_metadata = { - "metadata": metadata, + "metadata_raw": [codemeta_entry_data], + "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, } expected_deposit_info = { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1493820527}, }, "author_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1428332927}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], } expected_meta = { "origin": expected_origin, "origin_metadata": expected_origin_metadata, "deposit": expected_deposit_info, } assert data == expected_meta def test_access_to_nonexisting_deposit_returns_404_response( authenticated_client, deposit_collection, ): """Read unknown collection should return a 404 response """ unknown_id = 999 try: Deposit.objects.get(pk=unknown_id) except Deposit.DoesNotExist: assert True for url in private_get_raw_url_endpoints(deposit_collection, unknown_id): response = authenticated_client.get(url) assert response.status_code == status.HTTP_404_NOT_FOUND msg = "Deposit with id %s does not exist" % unknown_id assert msg in response.content.decode("utf-8")