diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 8850064d..2ccc45e1 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,118 +1,118 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser from swh.model.hashutil import hash_to_bytes from swh.model.swhids import CoreSWHID, ObjectType, QualifiedSWHID from . import APIPrivateView from ...errors import BAD_REQUEST, DepositError from ...models import DEPOSIT_STATUS_DETAIL, DEPOSIT_STATUS_LOAD_SUCCESS, Deposit from ..common import APIPut, ParsedRequestHeaders -MANDATORY_KEYS = ["origin_url", "revision_id", "directory_id", "snapshot_id"] +MANDATORY_KEYS = ["origin_url", "release_id", "directory_id", "snapshot_id"] class APIUpdateStatus(APIPrivateView, APIPut): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser,) def additional_checks( self, request, headers: ParsedRequestHeaders, collection_name, deposit=None ): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists - no missing information on load success update """ data = request.data status = data.get("status") if not status: msg = "The status key is mandatory with possible values %s" % list( DEPOSIT_STATUS_DETAIL.keys() ) raise DepositError(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = "Possible status in %s" % list(DEPOSIT_STATUS_DETAIL.keys()) raise DepositError(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: missing_keys = [] for key in MANDATORY_KEYS: value = data.get(key) if value is None: missing_keys.append(key) if missing_keys: msg = ( f"Updating deposit status to {status}" f" requires information {','.join(missing_keys)}" ) raise DepositError(BAD_REQUEST, msg) return {} def process_put( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> None: """Update the deposit with status and SWHIDs Returns: 204 No content 400 Bad request if checks fail """ data = request.data status = data["status"] deposit.status = status if status == DEPOSIT_STATUS_LOAD_SUCCESS: origin_url = data["origin_url"] directory_id = data["directory_id"] - revision_id = data["revision_id"] + release_id = data["release_id"] dir_id = CoreSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id) ) snp_id = CoreSWHID( object_type=ObjectType.SNAPSHOT, object_id=hash_to_bytes(data["snapshot_id"]), ) - rev_id = CoreSWHID( - object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id) + rel_id = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id) ) deposit.swhid = str(dir_id) # new id with contextual information deposit.swhid_context = str( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), origin=origin_url, visit=snp_id, - anchor=rev_id, + anchor=rel_id, path="/", ) ) else: # rejected deposit.status = status if "status_detail" in data: deposit.status_detail = data["status_detail"] deposit.save() diff --git a/swh/deposit/client.py b/swh/deposit/client.py index 41067f67..56047171 100644 --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -1,849 +1,849 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining an swh-deposit client """ import hashlib import logging import os from typing import Any, Dict, Optional, Tuple from urllib.parse import urljoin import warnings import requests from requests import Response from requests.utils import parse_header_links from swh.core.config import load_from_envvar from swh.deposit import __version__ as swh_deposit_version from swh.deposit.utils import parse_xml logger = logging.getLogger(__name__) def compute_unified_information( collection: str, in_progress: bool, slug: str, *, filepath: Optional[str] = None, swhid: Optional[str] = None, **kwargs, ) -> Dict[str, Any]: """Given a filepath, compute necessary information on that file. Args: collection: Deposit collection in_progress: do we finalize the deposit? slug: external id to use filepath: Path to the file to compute the necessary information out of swhid: Deposit swhid if any Returns: dict with keys: 'slug': external id to use 'in_progress': do we finalize the deposit? 'content-type': content type associated 'md5sum': md5 sum 'filename': filename 'filepath': filepath 'swhid': deposit swhid """ result: Dict[str, Any] = { "slug": slug, "in_progress": in_progress, "swhid": swhid, } content_type: Optional[str] = None md5sum: Optional[str] = None if filepath: filename = os.path.basename(filepath) md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() extension = filename.split(".")[-1] if "zip" in extension: content_type = "application/zip" else: content_type = "application/x-tar" result.update( { "content-type": content_type, "md5sum": md5sum, "filename": filename, "filepath": filepath, } ) return result class MaintenanceError(ValueError): """Informational maintenance error exception """ pass def handle_deprecated_config(config: Dict) -> Tuple[str, Optional[Tuple[str, str]]]: warnings.warn( '"config" argument is deprecated, please ' 'use "url" and "auth" arguments instead; note that "auth" ' "expects now a couple (username, password) and not a dict.", DeprecationWarning, ) url: str = config["url"] auth: Optional[Tuple[str, str]] = None if config.get("auth"): auth = (config["auth"]["username"], config["auth"]["password"]) return (url, auth) class BaseApiDepositClient: """Deposit client base class """ def __init__( self, config: Optional[Dict] = None, url: Optional[str] = None, auth: Optional[Tuple[str, str]] = None, ): if not url and not config: config = load_from_envvar() if config: url, auth = handle_deprecated_config(config) # needed to help mypy not be fooled by the Optional nature of url assert url is not None self.base_url = url.strip("/") + "/" self.auth = auth self.session = requests.Session() if auth: self.session.auth = auth self.session.headers.update( {"user-agent": f"swh-deposit/{swh_deposit_version}"} ) def do(self, method, url, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in self._methods' keys Returns: The request's execution """ full_url = urljoin(self.base_url, url.lstrip("/")) return self.session.request(method, full_url, *args, **kwargs) class PrivateApiDepositClient(BaseApiDepositClient): """Private API deposit client to: - read a given deposit's archive(s) - read a given deposit's metadata - update a given deposit's status """ def archive_get(self, archive_update_url: str, archive: str) -> Optional[str]: """Retrieve the archive from the deposit to a local directory. Args: archive_update_url (str): The full deposit archive(s)'s raw content to retrieve locally archive (str): the local archive's path where to store the raw content Returns: The archive path to the local archive to load. Or None if any problem arose. """ response = self.do("get", archive_update_url, stream=True) if response.ok: with open(archive, "wb") as f: for chunk in response.iter_content(): f.write(chunk) return archive msg = "Problem when retrieving deposit archive at %s" % (archive_update_url,) logger.error(msg) raise ValueError(msg) def metadata_get(self, metadata_url): """Retrieve the metadata information on a given deposit. Args: metadata_url (str): The full deposit metadata url to retrieve locally Returns: The dictionary of metadata for that deposit or None if any problem arose. """ r = self.do("get", metadata_url) if r.ok: return r.json() msg = "Problem when retrieving metadata at %s" % metadata_url logger.error(msg) raise ValueError(msg) def status_update( self, update_status_url, status, status_detail=None, - revision_id=None, + release_id=None, directory_id=None, origin_url=None, ): """Update the deposit's status. Args: update_status_url (str): the full deposit's archive status (str): The status to update the deposit with - revision_id (str/None): the revision's identifier to update to + release_id (str/None): the release's identifier to update to directory_id (str/None): the directory's identifier to update to origin_url (str/None): deposit's associated origin url """ payload = {"status": status} - if revision_id: - payload["revision_id"] = revision_id + if release_id: + payload["release_id"] = release_id if directory_id: payload["directory_id"] = directory_id if origin_url: payload["origin_url"] = origin_url if status_detail: payload["status_detail"] = status_detail self.do("put", update_status_url, json=payload) def check(self, check_url): """Check the deposit's associated data (metadata, archive(s)) Args: check_url (str): the full deposit's check url """ r = self.do("get", check_url) if r.ok: data = r.json() return data["status"] msg = "Problem when checking deposit %s" % check_url logger.error(msg) raise ValueError(msg) class BaseDepositClient(BaseApiDepositClient): """Base Deposit client to access the public api. """ def __init__( self, config=None, url=None, auth=None, error_msg=None, empty_result={} ): super().__init__(url=url, auth=auth, config=config) self.error_msg = error_msg self.empty_result = empty_result def compute_url(self, *args, **kwargs): """Compute api url endpoint to query.""" raise NotImplementedError def compute_method(self, *args, **kwargs): """Http method to use on the url""" raise NotImplementedError def parse_result_ok( self, xml_content: str, headers: Optional[Dict] = None ) -> Dict[str, Any]: """Given an xml result from the api endpoint, parse it and returns a dict. """ raise NotImplementedError def compute_information(self, *args, **kwargs) -> Dict[str, Any]: """Compute some more information given the inputs (e.g http headers, ...) """ return {} def parse_result_error(self, xml_content: str) -> Dict[str, Any]: """Given an error response in xml, parse it into a dict. Returns: dict with following keys: 'error': The error message 'detail': Some more detail about the error if any """ data = parse_xml(xml_content) sword_error = data["sword:error"] return { "summary": sword_error["atom:summary"], "detail": sword_error.get("detail", ""), "sword:verboseDescription": sword_error.get("sword:verboseDescription", ""), } def do_execute(self, method: str, url: str, info: Dict, **kwargs) -> Response: """Execute the http query to url using method and info information. By default, execute a simple query to url with the http method. Override this in subclass to improve the default behavior if needed. """ return self.do(method, url, **kwargs) def compute_params(self, **kwargs) -> Dict[str, Any]: """Determine the params out of the kwargs""" return {} def execute(self, *args, **kwargs) -> Dict[str, Any]: """Main endpoint to prepare and execute the http query to the api. Raises: MaintenanceError if some api maintenance is happening. Returns: Dict of computed api data """ url = self.compute_url(*args, **kwargs) method = self.compute_method(*args, **kwargs) info = self.compute_information(*args, **kwargs) params = self.compute_params(**kwargs) try: response = self.do_execute(method, url, info, params=params) except Exception as e: msg = self.error_msg % (url, e) result = self.empty_result result.update( {"error": msg,} ) return result else: if response.ok: if int(response.status_code) == 204: # 204 returns no body return {"status": response.status_code} else: headers = dict(response.headers) if response.headers else None return self.parse_result_ok(response.text, headers) else: error = self.parse_result_error(response.text) empty = self.empty_result error.update(empty) if response.status_code == 503: summary = error.get("summary") detail = error.get("sword:verboseDescription") # Maintenance error if summary and detail: raise MaintenanceError(f"{summary}: {detail}") error.update( {"status": response.status_code,} ) return error class ServiceDocumentDepositClient(BaseDepositClient): """Service Document information retrieval. """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="Service document failure at %s: %s", empty_result={"collection": None}, ) def compute_url(self, *args, **kwargs): return "/servicedocument/" def compute_method(self, *args, **kwargs): return "get" def parse_result_ok( self, xml_content: str, headers: Optional[Dict] = None ) -> Dict[str, Any]: """Parse service document's success response. """ return parse_xml(xml_content) def parse_result_error(self, xml_content: str) -> Dict[str, Any]: result = super().parse_result_error(xml_content) return {"error": result["summary"]} class StatusDepositClient(BaseDepositClient): """Status information on a deposit. """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="Status check failure at %s: %s", empty_result={ "deposit_status": None, "deposit_status_detail": None, "deposit_swh_id": None, }, ) def compute_url(self, collection, deposit_id): return "/%s/%s/status/" % (collection, deposit_id) def compute_method(self, *args, **kwargs): return "get" def parse_result_ok( self, xml_content: str, headers: Optional[Dict] = None ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ data = parse_xml(xml_content) keys = [ "deposit_id", "deposit_status", "deposit_status_detail", "deposit_swh_id", "deposit_swh_id_context", "deposit_external_id", ] return {key: data.get("swh:" + key) for key in keys} class CollectionListDepositClient(BaseDepositClient): """List a collection of deposits (owned by a user) """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="List deposits failure at %s: %s", empty_result={}, ) def compute_url(self, collection, **kwargs): return f"/{collection}/" def compute_method(self, *args, **kwargs): return "get" def compute_params(self, **kwargs) -> Dict[str, Any]: """Transmit pagination params if values provided are not None (e.g. page, page_size) """ return {k: v for k, v in kwargs.items() if v is not None} def parse_result_ok( self, xml_content: str, headers: Optional[Dict] = None ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ link_header = headers.get("Link", "") if headers else "" links = parse_header_links(link_header) data = parse_xml(xml_content)["atom:feed"] total_result = data.get("swh:count", 0) keys = [ "id", "reception_date", "complete_date", "external_id", "swhid", "status", "status_detail", "swhid_context", "origin_url", ] entries_ = data.get("atom:entry", []) entries = [entries_] if isinstance(entries_, dict) else entries_ deposits_d = [ { key: deposit.get(f"swh:{key}") for key in keys if deposit.get(f"swh:{key}") is not None } for deposit in entries ] return { "count": total_result, "deposits": deposits_d, **{entry["rel"]: entry["url"] for entry in links}, } class BaseCreateDepositClient(BaseDepositClient): """Deposit client base class to post new deposit. """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="Post Deposit failure at %s: %s", empty_result={"swh:deposit_id": None, "swh:deposit_status": None,}, ) def compute_url(self, collection, *args, **kwargs): return "/%s/" % collection def compute_method(self, *args, **kwargs): return "post" def parse_result_ok( self, xml_content: str, headers: Optional[Dict] = None ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ data = parse_xml(xml_content) keys = [ "deposit_id", "deposit_status", "deposit_status_detail", "deposit_date", ] return {key: data.get("swh:" + key) for key in keys} def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: return info def do_execute(self, method, url, info, **kwargs): with open(info["filepath"], "rb") as f: return self.do(method, url, data=f, headers=info["headers"]) class CreateArchiveDepositClient(BaseCreateDepositClient): """Post an archive (binary) deposit client.""" def compute_headers(self, info): headers = { "CONTENT_MD5": info["md5sum"], "IN-PROGRESS": str(info["in_progress"]), "CONTENT-TYPE": info["content-type"], "CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],), } if "slug" in info: headers["SLUG"] = info["slug"] return headers def compute_information(self, *args, **kwargs) -> Dict[str, Any]: info = compute_unified_information( *args, filepath=kwargs["archive_path"], **kwargs ) info["headers"] = self.compute_headers(info) return info class UpdateArchiveDepositClient(CreateArchiveDepositClient): """Update (add/replace) an archive (binary) deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/media/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class CreateMetadataDepositClient(BaseCreateDepositClient): """Post a metadata deposit client.""" def compute_headers(self, info): headers = { "IN-PROGRESS": str(info["in_progress"]), "CONTENT-TYPE": "application/atom+xml;type=entry", } if "slug" in info: headers["SLUG"] = info["slug"] return headers def compute_information(self, *args, **kwargs) -> Dict[str, Any]: info = compute_unified_information( *args, filepath=kwargs["metadata_path"], **kwargs ) info["headers"] = self.compute_headers(info) return info class UpdateMetadataOnPartialDepositClient(CreateMetadataDepositClient): """Update (add/replace) metadata on partial deposit scenario.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return f"/{collection}/{deposit_id}/metadata/" def compute_method(self, *args, replace: bool = False, **kwargs) -> str: return "put" if replace else "post" class UpdateMetadataOnDoneDepositClient(CreateMetadataDepositClient): """Update metadata on "done" deposit. This requires the deposit swhid.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return f"/{collection}/{deposit_id}/atom/" def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: return { "CONTENT-TYPE": "application/atom+xml;type=entry", "X_CHECK_SWHID": info["swhid"], } def compute_method(self, *args, **kwargs) -> str: return "put" class CreateMetadataOnlyDepositClient(BaseCreateDepositClient): """Create metadata-only deposit.""" def compute_information(self, *args, **kwargs) -> Dict[str, Any]: return { "headers": {"CONTENT-TYPE": "application/atom+xml;type=entry",}, "filepath": kwargs["metadata_path"], } def parse_result_ok( self, xml_content: str, headers: Optional[Dict] = None ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ data = parse_xml(xml_content) keys = [ "deposit_id", "deposit_status", "deposit_date", ] return {key: data.get("swh:" + key) for key in keys} class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" def _multipart_info(self, info, info_meta): files = [ ( "file", (info["filename"], open(info["filepath"], "rb"), info["content-type"]), ), ( "atom", ( info_meta["filename"], open(info_meta["filepath"], "rb"), "application/atom+xml", ), ), ] headers = { "CONTENT_MD5": info["md5sum"], "IN-PROGRESS": str(info["in_progress"]), } if "slug" in info: headers["SLUG"] = info["slug"] return files, headers def compute_information(self, *args, **kwargs) -> Dict[str, Any]: info = compute_unified_information(*args, filepath=kwargs["archive_path"],) info_meta = compute_unified_information( *args, filepath=kwargs["metadata_path"], ) files, headers = self._multipart_info(info, info_meta) return {"files": files, "headers": headers} def do_execute(self, method, url, info, **kwargs): return self.do(method, url, files=info["files"], headers=info["headers"]) class UpdateMultipartDepositClient(CreateMultipartDepositClient): """Update a multipart deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/metadata/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class PublicApiDepositClient(BaseApiDepositClient): """Public api deposit client.""" def service_document(self): """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(url=self.base_url, auth=self.auth).execute() def deposit_status(self, collection: str, deposit_id: int): """Retrieve status information on a deposit.""" return StatusDepositClient(url=self.base_url, auth=self.auth).execute( collection, deposit_id ) def deposit_list( self, collection: str, page: Optional[int] = None, page_size: Optional[int] = None, ): """List deposits from the collection""" return CollectionListDepositClient(url=self.base_url, auth=self.auth).execute( collection, page=page, page_size=page_size ) def deposit_create( self, collection: str, slug: Optional[str], archive: Optional[str] = None, metadata: Optional[str] = None, in_progress: bool = False, ): """Create a new deposit (archive, metadata, both as multipart).""" if archive and not metadata: return CreateArchiveDepositClient( url=self.base_url, auth=self.auth ).execute(collection, in_progress, slug, archive_path=archive) elif not archive and metadata: return CreateMetadataDepositClient( url=self.base_url, auth=self.auth ).execute(collection, in_progress, slug, metadata_path=metadata) else: return CreateMultipartDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, archive_path=archive, metadata_path=metadata, ) def deposit_update( self, collection: str, deposit_id: int, slug: Optional[str], archive: Optional[str] = None, metadata: Optional[str] = None, in_progress: bool = False, replace: bool = False, swhid: Optional[str] = None, ): """Update (add/replace) existing deposit (archive, metadata, both).""" response = self.deposit_status(collection, deposit_id) if "error" in response: return response status = response["deposit_status"] if swhid is None and status != "partial": return { "error": "You can only act on deposit with status 'partial'", "detail": f"The deposit {deposit_id} has status '{status}'", "deposit_status": status, "deposit_id": deposit_id, } if swhid is not None and status != "done": return { "error": "You can only update metadata on deposit with status 'done'", "detail": f"The deposit {deposit_id} has status '{status}'", "deposit_status": status, "deposit_id": deposit_id, } if archive and not metadata: result = UpdateArchiveDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, deposit_id=deposit_id, archive_path=archive, replace=replace, ) elif not archive and metadata and swhid is None: result = UpdateMetadataOnPartialDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, deposit_id=deposit_id, metadata_path=metadata, replace=replace, ) elif not archive and metadata and swhid is not None: result = UpdateMetadataOnDoneDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, deposit_id=deposit_id, metadata_path=metadata, swhid=swhid, ) else: result = UpdateMultipartDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, deposit_id=deposit_id, archive_path=archive, metadata_path=metadata, replace=replace, ) if "error" in result: return result return self.deposit_status(collection, deposit_id) def deposit_metadata_only( self, collection: str, metadata: Optional[str] = None, ): assert metadata is not None return CreateMetadataOnlyDepositClient( url=self.base_url, auth=self.auth ).execute(collection, metadata_path=metadata) diff --git a/swh/deposit/config.py b/swh/deposit/config.py index 30424bf6..32b93e8f 100644 --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,118 +1,118 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict from swh.core import config from swh.deposit import __version__ from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher from swh.scheduler import get_scheduler from swh.scheduler.interface import SchedulerInterface from swh.storage import get_storage from swh.storage.interface import StorageInterface # IRIs (Internationalized Resource identifier) sword 2.0 specified EDIT_IRI = "edit_iri" SE_IRI = "se_iri" EM_IRI = "em_iri" CONT_FILE_IRI = "cont_file_iri" SD_IRI = "servicedocument" COL_IRI = "upload" STATE_IRI = "state_iri" PRIVATE_GET_RAW_CONTENT = "private-download" PRIVATE_CHECK_DEPOSIT = "check-deposit" PRIVATE_PUT_DEPOSIT = "private-update" PRIVATE_GET_DEPOSIT_METADATA = "private-read" PRIVATE_LIST_DEPOSITS = "private-deposit-list" ARCHIVE_KEY = "archive" METADATA_KEY = "metadata" RAW_METADATA_KEY = "raw-metadata" ARCHIVE_TYPE = "archive" METADATA_TYPE = "metadata" AUTHORIZED_PLATFORMS = ["development", "production", "testing"] DEPOSIT_STATUS_REJECTED = "rejected" DEPOSIT_STATUS_PARTIAL = "partial" DEPOSIT_STATUS_DEPOSITED = "deposited" DEPOSIT_STATUS_VERIFIED = "verified" DEPOSIT_STATUS_LOAD_SUCCESS = "done" DEPOSIT_STATUS_LOAD_FAILURE = "failed" -# Revision author for deposit +# Release author for deposit SWH_PERSON = { "name": "Software Heritage", "fullname": "Software Heritage", "email": "robot@softwareheritage.org", } DEFAULT_CONFIG = { "max_upload_size": 209715200, "checks": True, } def setup_django_for(platform=None, config_file=None): """Setup function for command line tools (swh.deposit.create_user) to initialize the needed db access. Note: Do not import any django related module prior to this function call. Otherwise, this will raise an django.core.exceptions.ImproperlyConfigured error message. Args: platform (str): the platform the scheduling is running config_file (str): Extra configuration file (typically for the production platform) Raises: ValueError in case of wrong platform inputs. """ if platform is not None: if platform not in AUTHORIZED_PLATFORMS: raise ValueError("Platform should be one of %s" % AUTHORIZED_PLATFORMS) if "DJANGO_SETTINGS_MODULE" not in os.environ: os.environ["DJANGO_SETTINGS_MODULE"] = "swh.deposit.settings.%s" % platform if config_file: os.environ.setdefault("SWH_CONFIG_FILENAME", config_file) import django django.setup() class APIConfig: """API Configuration centralized class. This loads explicitly the configuration file out of the SWH_CONFIG_FILENAME environment variable. """ def __init__(self): self.config: Dict[str, Any] = config.load_from_envvar(DEFAULT_CONFIG) self.scheduler: SchedulerInterface = get_scheduler(**self.config["scheduler"]) self.tool = { "name": "swh-deposit", "version": __version__, "configuration": {"sword_version": "2"}, } self.storage: StorageInterface = get_storage(**self.config["storage"]) self.storage_metadata: StorageInterface = get_storage( **self.config["storage_metadata"] ) def swh_deposit_authority(self): return MetadataAuthority( type=MetadataAuthorityType.REGISTRY, url=self.config["swh_authority_url"], ) def swh_deposit_fetcher(self): return MetadataFetcher(name=self.tool["name"], version=self.tool["version"],) diff --git a/swh/deposit/tests/api/test_deposit_private_update_status.py b/swh/deposit/tests/api/test_deposit_private_update_status.py index 9fd6034e..7ac6974b 100644 --- a/swh/deposit/tests/api/test_deposit_private_update_status.py +++ b/swh/deposit/tests/api/test_deposit_private_update_status.py @@ -1,198 +1,198 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy import json from django.urls import reverse_lazy as reverse from rest_framework import status from swh.deposit.api.private.deposit_update_status import MANDATORY_KEYS from swh.deposit.config import ( DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, PRIVATE_PUT_DEPOSIT, ) from swh.deposit.models import Deposit PRIVATE_PUT_DEPOSIT_NC = PRIVATE_PUT_DEPOSIT + "-nc" def private_check_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" return [ reverse(PRIVATE_PUT_DEPOSIT, args=[collection.name, deposit.id]), reverse(PRIVATE_PUT_DEPOSIT_NC, args=[deposit.id]), ] def test_update_deposit_status_success_with_info( authenticated_client, deposit_collection, ready_deposit_verified ): """Update deposit with load success should require all information to succeed """ deposit = ready_deposit_verified expected_status = DEPOSIT_STATUS_LOAD_SUCCESS status_detail = "it works!" origin_url = "something" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" - revision_id = "47dc6b4636c7f6cba0df83e3d5490bf4334d987e" + release_id = "47dc6b4636c7f6cba0df83e3d5490bf4334d987e" snapshot_id = "68c0d26104d47e278dd6be07ed61fafb561d0d20" full_body_info = { "status": DEPOSIT_STATUS_LOAD_SUCCESS, "status_detail": status_detail, - "revision_id": revision_id, + "release_id": release_id, "directory_id": directory_id, "snapshot_id": snapshot_id, "origin_url": origin_url, } for url in private_check_url_endpoints(deposit_collection, deposit): expected_swhid = "swh:1:dir:%s" % directory_id expected_swhid_context = ( f"{expected_swhid}" f";origin={origin_url}" f";visit=swh:1:snp:{snapshot_id}" - f";anchor=swh:1:rev:{revision_id}" + f";anchor=swh:1:rel:{release_id}" f";path=/" ) response = authenticated_client.put( url, content_type="application/json", data=json.dumps(full_body_info), ) assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == expected_status assert deposit.status_detail == status_detail assert deposit.swhid == expected_swhid assert deposit.swhid_context == expected_swhid_context # Reset deposit deposit = ready_deposit_verified deposit.save() def test_update_deposit_status_rejected_with_info( authenticated_client, deposit_collection, ready_deposit_verified ): """Update deposit with rejected status needs few information to succeed """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( url, content_type="application/json", data=json.dumps({"status": DEPOSIT_STATUS_LOAD_FAILURE}), ) assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_LOAD_FAILURE assert deposit.swhid is None assert deposit.swhid_context is None # Reset status deposit = ready_deposit_verified deposit.save() def test_update_deposit_status_success_with_incomplete_data( authenticated_client, deposit_collection, ready_deposit_verified ): """Update deposit status with status success and incomplete information should fail """ deposit = ready_deposit_verified origin_url = "something" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" - revision_id = "47dc6b4636c7f6cba0df83e3d5490bf4334d987e" + release_id = "47dc6b4636c7f6cba0df83e3d5490bf4334d987e" snapshot_id = "68c0d26104d47e278dd6be07ed61fafb561d0d20" new_status = DEPOSIT_STATUS_LOAD_SUCCESS full_body_info = { "status": new_status, - "revision_id": revision_id, + "release_id": release_id, "directory_id": directory_id, "snapshot_id": snapshot_id, "origin_url": origin_url, } for url in private_check_url_endpoints(deposit_collection, deposit): for key in MANDATORY_KEYS: # Crafting body with missing information so that it raises body = copy.deepcopy(full_body_info) body.pop(key) # make the body incomplete response = authenticated_client.put( url, content_type="application/json", data=json.dumps(body), ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert ( f"deposit status to {new_status} requires information {key}" in response.content.decode("utf-8") ) def test_update_deposit_status_will_fail_with_unknown_status( authenticated_client, deposit_collection, ready_deposit_verified ): """Unknown status for update should return a 400 response """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( url, content_type="application/json", data=json.dumps({"status": "unknown"}) ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Possible status in " in response.content def test_update_deposit_status_will_fail_with_no_status_key( authenticated_client, deposit_collection, ready_deposit_verified ): """No status provided for update should return a 400 response """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( url, content_type="application/json", data=json.dumps({"something": "something"}), ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"The status key is mandatory with possible values" in response.content def test_update_deposit_status_success_without_swhid_fail( authenticated_client, deposit_collection, ready_deposit_verified ): """Providing successful status without swhid should return a 400 """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( url, content_type="application/json", data=json.dumps({"status": DEPOSIT_STATUS_LOAD_SUCCESS}), ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert ( b"Updating deposit status to done requires information" in response.content ) diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index 344b2b26..13da7726 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,608 +1,608 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from copy import deepcopy from functools import partial from io import BytesIO import os import re from typing import TYPE_CHECKING, Dict, Mapping from django.test.utils import setup_databases # type: ignore from django.urls import reverse_lazy as reverse import psycopg2 from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT import pytest from rest_framework import status from rest_framework.test import APIClient import yaml from swh.auth.pytest_plugin import keycloak_mock_factory from swh.core.config import read from swh.core.pytest_plugin import get_response_cb from swh.deposit.auth import DEPOSIT_PERMISSION from swh.deposit.config import ( COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_VERIFIED, SE_IRI, setup_django_for, ) from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import ( create_arborescence_archive, post_archive, post_atom, ) from swh.model.hashutil import hash_to_bytes from swh.model.swhids import CoreSWHID, ObjectType, QualifiedSWHID from swh.scheduler import get_scheduler if TYPE_CHECKING: from swh.deposit.models import Deposit, DepositClient, DepositCollection # mypy is asked to ignore the import statement above because setup_databases # is not part of the d.t.utils.__all__ variable. USERNAME = "test" EMAIL = "test@example.org" COLLECTION = "test" TEST_USER = { "username": USERNAME, "password": "pass", "email": EMAIL, "provider_url": "https://hal-test.archives-ouvertes.fr/", "domain": "archives-ouvertes.fr/", "collection": {"name": COLLECTION}, } USER_INFO = { "name": USERNAME, "email": EMAIL, "email_verified": False, "family_name": "", "given_name": "", "groups": [], "preferred_username": USERNAME, "sub": "ffffffff-bbbb-4444-aaaa-14f61e6b7200", } USERNAME2 = "test2" EMAIL2 = "test@example.org" COLLECTION2 = "another-collection" TEST_USER2 = { "username": USERNAME2, "password": "", "email": EMAIL2, "provider_url": "https://hal-test.archives-ouvertes.example/", "domain": "archives-ouvertes.example/", "collection": {"name": COLLECTION2}, } KEYCLOAK_SERVER_URL = "https://auth.swh.org/SWHTest" KEYCLOAK_REALM_NAME = "SWHTest" CLIENT_ID = "swh-deposit" keycloak_mock_auth_success = keycloak_mock_factory( server_url=KEYCLOAK_SERVER_URL, realm_name=KEYCLOAK_REALM_NAME, client_id=CLIENT_ID, auth_success=True, user_info=USER_INFO, client_permissions=[DEPOSIT_PERMISSION], ) keycloak_mock_auth_failure = keycloak_mock_factory( server_url=KEYCLOAK_SERVER_URL, realm_name=KEYCLOAK_REALM_NAME, client_id=CLIENT_ID, auth_success=False, ) def pytest_configure(): setup_django_for("testing") @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put/post methods """ cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.put(re.compile("https://"), body=cb) requests_mock_datadir.post(re.compile("https://"), body=cb) return requests_mock_datadir @pytest.fixture def common_deposit_config(swh_scheduler_config, swh_storage_backend_config): return { "max_upload_size": 500, "extraction_dir": "/tmp/swh-deposit/test/extraction-dir", "checks": False, "scheduler": {"cls": "local", **swh_scheduler_config,}, "storage": swh_storage_backend_config, "storage_metadata": swh_storage_backend_config, "swh_authority_url": "http://deposit.softwareheritage.example/", } @pytest.fixture() def deposit_config(common_deposit_config): return { **common_deposit_config, "authentication_provider": "keycloak", "keycloak": { "server_url": KEYCLOAK_SERVER_URL, "realm_name": KEYCLOAK_REALM_NAME, }, } @pytest.fixture() def deposit_config_path(tmp_path, monkeypatch, deposit_config): conf_path = os.path.join(tmp_path, "deposit.yml") with open(conf_path, "w") as f: f.write(yaml.dump(deposit_config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) return conf_path @pytest.fixture(autouse=True) def deposit_autoconfig(deposit_config_path): """Enforce config for deposit classes inherited from APIConfig.""" cfg = read(deposit_config_path) if "scheduler" in cfg: # scheduler setup: require the check-deposit and load-deposit tasks scheduler = get_scheduler(**cfg["scheduler"]) task_types = [ { "type": "check-deposit", "backend_name": "swh.deposit.loader.tasks.ChecksDepositTsk", "description": "Check deposit metadata/archive before loading", "num_retries": 3, }, { "type": "load-deposit", "backend_name": "swh.loader.package.deposit.tasks.LoadDeposit", "description": "Loading deposit archive into swh archive", "num_retries": 3, }, ] for task_type in task_types: scheduler.create_task_type(task_type) @pytest.fixture(scope="session") def django_db_setup(request, django_db_blocker, postgresql_proc): from django.conf import settings settings.DATABASES["default"].update( { ("ENGINE", "django.db.backends.postgresql"), ("NAME", "tests"), ("USER", postgresql_proc.user), # noqa ("HOST", postgresql_proc.host), # noqa ("PORT", postgresql_proc.port), # noqa } ) with django_db_blocker.unblock(): setup_databases( verbosity=request.config.option.verbose, interactive=False, keepdb=False ) def execute_sql(sql): """Execute sql to postgres db""" with psycopg2.connect(database="postgres") as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) @pytest.fixture(autouse=True, scope="session") def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ os.environ["http_proxy"] = "http://localhost:999" os.environ["https_proxy"] = "http://localhost:999" def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection try: collection = DepositCollection._default_manager.get(name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection def deposit_collection_factory(collection_name): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory(COLLECTION) deposit_another_collection = deposit_collection_factory(COLLECTION2) def _create_deposit_user( collection: "DepositCollection", user_data: Dict ) -> "DepositClient": """Create/Return the test_user "test" For basic authentication, this will save a password. This is not required for keycloak authentication scheme. """ from swh.deposit.models import DepositClient user_data_d = deepcopy(user_data) user_data_d.pop("collection", None) passwd = user_data_d.pop("password", None) user, _ = DepositClient.objects.get_or_create( # type: ignore username=user_data_d["username"], defaults={**user_data_d, "collections": [collection.id]}, ) if passwd: user.set_password(passwd) user.save() return user @pytest.fixture def deposit_user(db, deposit_collection): return _create_deposit_user(deposit_collection, TEST_USER) @pytest.fixture def deposit_another_user(db, deposit_another_collection): return _create_deposit_user(deposit_another_collection, TEST_USER2) @pytest.fixture def anonymous_client(): """Create an anonymous client (no credentials during queries to the deposit) """ return APIClient() # <- drf's client def mock_keycloakopenidconnect(mocker, keycloak_mock): """Mock swh.deposit.auth.KeycloakOpenIDConnect to return the keycloak_mock """ mock = mocker.patch("swh.deposit.auth.KeycloakOpenIDConnect") mock.from_configfile.return_value = keycloak_mock return mock @pytest.fixture def mock_keycloakopenidconnect_ok(mocker, keycloak_mock_auth_success): """Mock keycloak so it always accepts connection for user with the right permissions """ return mock_keycloakopenidconnect(mocker, keycloak_mock_auth_success) @pytest.fixture def mock_keycloakopenidconnect_ko(mocker, keycloak_mock_auth_failure): """Mock keycloak so it always refuses connections.""" return mock_keycloakopenidconnect(mocker, keycloak_mock_auth_failure) def _create_authenticated_client(client, user, password=None): """Return a client whose credentials will be proposed to the deposit server. This also patched the client instance to keep a reference on the associated deposit_user. """ if not password: password = "irrelevant-if-not-set" _token = "%s:%s" % (user.username, password) token = base64.b64encode(_token.encode("utf-8")) authorization = "Basic %s" % token.decode("utf-8") client.credentials(HTTP_AUTHORIZATION=authorization) client.deposit_client = user yield client client.logout() @pytest.fixture def basic_authenticated_client(anonymous_client, deposit_user): yield from _create_authenticated_client( anonymous_client, deposit_user, password=TEST_USER["password"] ) @pytest.fixture def authenticated_client(mock_keycloakopenidconnect_ok, anonymous_client, deposit_user): yield from _create_authenticated_client(anonymous_client, deposit_user) @pytest.fixture def unauthorized_client(mock_keycloakopenidconnect_ko, anonymous_client, deposit_user): """Create an unauthorized client (will see their authentication fail) """ yield from _create_authenticated_client(anonymous_client, deposit_user) @pytest.fixture def insufficient_perm_client( mocker, keycloak_mock_auth_success, anonymous_client, deposit_user ): """keycloak accepts connection but client returned has no deposit permission, so access is not allowed. """ keycloak_mock_auth_success.client_permissions = [] mock_keycloakopenidconnect(mocker, keycloak_mock_auth_success) yield from _create_authenticated_client(anonymous_client, deposit_user) @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( tmp_path, "archive1", "file1", b"some content in file" ) return archive @pytest.fixture def atom_dataset(datadir) -> Mapping[str, str]: """Compute the paths to atom files. Returns: Dict of atom name per content (bytes) """ atom_path = os.path.join(datadir, "atom") data = {} for filename in os.listdir(atom_path): filepath = os.path.join(atom_path, filename) with open(filepath, "rb") as f: raw_content = f.read().decode("utf-8") # Keep the filename without extension atom_name = filename.split(".")[0] data[atom_name] = raw_content return data def internal_create_deposit( client: "DepositClient", collection: "DepositCollection", external_id: str, status: str, ) -> "Deposit": """Create a deposit for a given collection with internal tool """ from swh.deposit.models import Deposit deposit = Deposit( client=client, external_id=external_id, status=status, collection=collection ) deposit.save() return deposit def create_deposit( client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED, in_progress=False, ): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = post_archive( client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS=str(in_progress).lower(), ) # then assert response.status_code == status.HTTP_201_CREATED, response.content.decode() from swh.deposit.models import Deposit response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit._default_manager.get(id=deposit_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( authenticated_client, collection_name: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, atom_dataset: Mapping[str, bytes] = {}, **kwargs, ): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( authenticated_client, collection_name, deposit_status=DEPOSIT_STATUS_PARTIAL, **kwargs, ) origin_url = deposit.client.provider_url + deposit.external_id response = post_atom( authenticated_client, reverse(SE_IRI, args=[collection_name, deposit.id]), data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit.id) assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED, in_progress=False): """Build deposit with a specific status """ @pytest.fixture() def _deposit( sample_archive, deposit_collection, authenticated_client, deposit_status=deposit_status, ): external_id = "external-id-%s" % deposit_status return create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id=external_id, deposit_status=deposit_status, in_progress=in_progress, ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory( deposit_status=DEPOSIT_STATUS_PARTIAL, in_progress=True ) verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( sample_archive, deposit_collection, authenticated_client, atom_dataset ): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( authenticated_client, deposit_collection.name, sample_archive=sample_archive, external_id="external-id-partial", in_progress=True, deposit_status=DEPOSIT_STATUS_PARTIAL, atom_dataset=atom_dataset, ) @pytest.fixture def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset ): response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=atom_dataset["entry-data1"], HTTP_SLUG="external-id-partial", HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content["swh:deposit_id"] from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id="external-id-complete", deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS, ) origin = "https://hal.archives-ouvertes.fr/hal-01727745" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" - revision_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10") + release_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10") snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0") deposit.swhid = f"swh:1:dir:{directory_id}" deposit.swhid_context = str( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), origin=origin, visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id), - anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id), + anchor=CoreSWHID(object_type=ObjectType.RELEASE, object_id=release_id), path=b"/", ) ) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version diff --git a/swh/deposit/tests/loader/common.py b/swh/deposit/tests/loader/common.py index 0ebbc603..fd466f0c 100644 --- a/swh/deposit/tests/loader/common.py +++ b/swh/deposit/tests/loader/common.py @@ -1,139 +1,139 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from typing import Dict, Optional from swh.deposit.client import PrivateApiDepositClient from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import SnapshotBranch, TargetType from swh.storage.algos.snapshot import snapshot_get_all_branches CLIENT_TEST_CONFIG = { "url": "http://nowhere:9000/", "auth": {}, # no authentication in test scenario } class SWHDepositTestClient(PrivateApiDepositClient): """Deposit test client to permit overriding the default request client. """ def __init__(self, client, config): super().__init__(config=config) self.client = client def archive_get(self, archive_update_url, archive_path, log=None): r = self.client.get(archive_update_url) with open(archive_path, "wb") as f: for chunk in r.streaming_content: f.write(chunk) return archive_path def metadata_get(self, metadata_url, log=None): r = self.client.get(metadata_url) return json.loads(r.content.decode("utf-8")) def status_update( self, update_status_url, status, - revision_id=None, + release_id=None, directory_id=None, origin_url=None, ): payload = {"status": status} - if revision_id: - payload["revision_id"] = revision_id + if release_id: + payload["release_id"] = release_id if directory_id: payload["directory_id"] = directory_id if origin_url: payload["origin_url"] = origin_url self.client.put( update_status_url, content_type="application/json", data=json.dumps(payload) ) def check(self, check_url): r = self.client.get(check_url) data = json.loads(r.content.decode("utf-8")) return data["status"] def get_stats(storage) -> Dict: """Adaptation utils to unify the stats counters across storage implementation. """ storage.refresh_stat_counters() stats = storage.stat_counters() keys = [ "content", "directory", "origin", "origin_visit", "person", "release", "revision", "skipped_content", "snapshot", ] return {k: stats.get(k) for k in keys} def decode_target(branch: Optional[SnapshotBranch]) -> Optional[Dict]: """Test helper to ease readability in test """ if not branch: return None target_type = branch.target_type if target_type == TargetType.ALIAS: decoded_target = branch.target.decode("utf-8") else: decoded_target = hash_to_hex(branch.target) return {"target": decoded_target, "target_type": target_type} def check_snapshot(expected_snapshot, storage): """Check for snapshot match. Provide the hashes as hexadecimal, the conversion is done within the method. Args: expected_snapshot (dict): full snapshot with hex ids storage (Storage): expected storage """ expected_snapshot_id = expected_snapshot["id"] expected_branches = expected_snapshot["branches"] snap = snapshot_get_all_branches(hash_to_bytes(expected_snapshot_id)) if snap is None: # display known snapshots instead if possible if hasattr(storage, "_snapshots"): # in-mem storage from pprint import pprint for snap_id, (_snap, _) in storage._snapshots.items(): snapd = _snap.to_dict() snapd["id"] = hash_to_hex(snapd["id"]) branches = { branch.decode("utf-8"): decode_target(target) for branch, target in snapd["branches"].items() } snapd["branches"] = branches pprint(snapd) raise AssertionError("Snapshot is not found") branches = { branch.decode("utf-8"): decode_target(branch) for branch_name, branch in snap["branches"].items() } assert expected_branches == branches diff --git a/swh/deposit/tests/loader/test_client.py b/swh/deposit/tests/loader/test_client.py index 8434a0d0..7745a009 100644 --- a/swh/deposit/tests/loader/test_client.py +++ b/swh/deposit/tests/loader/test_client.py @@ -1,246 +1,246 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os from typing import Any, Callable, Optional from urllib.parse import urlparse import pytest from requests import Session from swh.deposit.client import PrivateApiDepositClient from swh.deposit.config import DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS CLIENT_TEST_CONFIG = { "url": "https://nowhere.org/", "auth": {}, # no authentication in test scenario } @pytest.fixture def deposit_config(): return CLIENT_TEST_CONFIG def test_client_config(deposit_config_path): for client in [ # config passed as constructor parameter PrivateApiDepositClient(config=CLIENT_TEST_CONFIG), # config loaded from environment PrivateApiDepositClient(), ]: assert client.base_url == CLIENT_TEST_CONFIG["url"] assert client.auth is None def build_expected_path(datadir, base_url: str, api_url: str) -> str: """Build expected path from api to served file """ url = urlparse(base_url) dirname = "%s_%s" % (url.scheme, url.hostname) if api_url.endswith("/"): api_url = api_url[:-1] if api_url.startswith("/"): api_url = api_url[1:] suffix_path = api_url.replace("/", "_") return os.path.join(datadir, dirname, suffix_path) def test_build_expected_path(datadir): actual_path = build_expected_path(datadir, "http://example.org", "/hello/you/") assert actual_path == os.path.join(datadir, "http_example.org", "hello_you") def read_served_path( datadir, base_url: str, api_url: str, convert_fn: Optional[Callable[[str], Any]] = None, ) -> bytes: """Read served path """ archive_path = build_expected_path(datadir, base_url, api_url) with open(archive_path, "rb") as f: content = f.read() if convert_fn: content = convert_fn(content.decode("utf-8")) return content def test_read_served_path(datadir): actual_content = read_served_path(datadir, "http://example.org", "/hello/you/") assert actual_content == b"hello people\n" actual_content2 = read_served_path( datadir, "http://example.org", "/hello.json", convert_fn=json.loads ) assert actual_content2 == {"a": [1, 3]} # private api to retrieve archive def test_archive_get(tmp_path, datadir, requests_mock_datadir): """Retrieving archive data through private api should stream data """ api_url = "/1/private/test/1/raw/" client = PrivateApiDepositClient(CLIENT_TEST_CONFIG) expected_content = read_served_path(datadir, client.base_url, api_url) archive_path = os.path.join(tmp_path, "test.archive") archive_path = client.archive_get(api_url, archive_path) assert os.path.exists(archive_path) is True with open(archive_path, "rb") as f: actual_content = f.read() assert actual_content == expected_content assert client.base_url == CLIENT_TEST_CONFIG["url"] assert client.auth is None def test_archive_get_auth(tmp_path, datadir, requests_mock_datadir): """Retrieving archive data through private api should stream data """ api_url = "/1/private/test/1/raw/" config = CLIENT_TEST_CONFIG.copy() config["auth"] = { # add authentication setup "username": "user", "password": "pass", } client = PrivateApiDepositClient(config) expected_content = read_served_path(datadir, client.base_url, api_url) archive_path = os.path.join(tmp_path, "test.archive") archive_path = client.archive_get(api_url, archive_path) assert os.path.exists(archive_path) is True with open(archive_path, "rb") as f: actual_content = f.read() assert actual_content == expected_content assert client.base_url == CLIENT_TEST_CONFIG["url"] assert client.auth == ("user", "pass") def test_archive_get_ko(tmp_path, datadir, requests_mock_datadir): """Reading archive can fail for some reasons """ unknown_api_url = "/1/private/unknown/deposit-id/raw/" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) with pytest.raises(ValueError, match="Problem when retrieving deposit"): client.archive_get(unknown_api_url, "some/path") # private api read metadata def test_metadata_get(datadir, requests_mock_datadir): """Reading archive should write data in temporary directory """ api_url = "/1/private/test/1/metadata" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) actual_metadata = client.metadata_get(api_url) assert isinstance(actual_metadata, str) is False expected_content = read_served_path( datadir, client.base_url, api_url, convert_fn=json.loads ) assert actual_metadata == expected_content def test_metadata_get_ko(requests_mock_datadir): """Reading metadata can fail for some reasons """ unknown_api_url = "/1/private/unknown/deposit-id/metadata/" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) with pytest.raises(ValueError, match="Problem when retrieving metadata"): client.metadata_get(unknown_api_url) # private api check def test_check(requests_mock_datadir): """When check ok, this should return the deposit's status """ api_url = "/1/private/test/1/check" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) r = client.check(api_url) assert r == "something" def test_check_fails(requests_mock_datadir): """Checking deposit can fail for some reason """ unknown_api_url = "/1/private/test/10/check" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) with pytest.raises(ValueError, match="Problem when checking deposit"): client.check(unknown_api_url) # private api update status def test_status_update(mocker): """Update status """ mocked_put = mocker.patch.object(Session, "request") deposit_client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) deposit_client.status_update( "/update/status", DEPOSIT_STATUS_LOAD_SUCCESS, - revision_id="some-revision-id", + release_id="some-release-id", status_detail="foo bar", ) mocked_put.assert_called_once_with( "put", "https://nowhere.org/update/status", json={ "status": DEPOSIT_STATUS_LOAD_SUCCESS, "status_detail": "foo bar", - "revision_id": "some-revision-id", + "release_id": "some-release-id", }, ) -def test_status_update_with_no_revision_id(mocker): +def test_status_update_with_no_release_id(mocker): """Reading metadata can fail for some reasons """ mocked_put = mocker.patch.object(Session, "request") deposit_client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) deposit_client.status_update("/update/status/fail", DEPOSIT_STATUS_LOAD_FAILURE) mocked_put.assert_called_once_with( "put", "https://nowhere.org/update/status/fail", json={"status": DEPOSIT_STATUS_LOAD_FAILURE,}, )