Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/client.py
Show All 9 Lines | |||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Any, Dict, Optional, Tuple | from typing import Any, Dict, Optional, Tuple | ||||
from urllib.parse import urljoin | from urllib.parse import urljoin | ||||
import warnings | import warnings | ||||
import requests | import requests | ||||
from requests import Response | |||||
from swh.core.config import load_from_envvar | from swh.core.config import load_from_envvar | ||||
from swh.deposit import __version__ as swh_deposit_version | from swh.deposit import __version__ as swh_deposit_version | ||||
from swh.deposit.utils import parse_xml | from swh.deposit.utils import parse_xml | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | class PrivateApiDepositClient(BaseApiDepositClient): | ||||
"""Private API deposit client to: | """Private API deposit client to: | ||||
- read a given deposit's archive(s) | - read a given deposit's archive(s) | ||||
- read a given deposit's metadata | - read a given deposit's metadata | ||||
- update a given deposit's status | - update a given deposit's status | ||||
""" | """ | ||||
def archive_get(self, archive_update_url, archive): | def archive_get(self, archive_update_url: str, archive: str) -> Optional[str]: | ||||
"""Retrieve the archive from the deposit to a local directory. | """Retrieve the archive from the deposit to a local directory. | ||||
Args: | Args: | ||||
archive_update_url (str): The full deposit archive(s)'s raw content | archive_update_url (str): The full deposit archive(s)'s raw content | ||||
to retrieve locally | to retrieve locally | ||||
archive (str): the local archive's path where to store | archive (str): the local archive's path where to store | ||||
the raw content | the raw content | ||||
Returns: | Returns: | ||||
The archive path to the local archive to load. | The archive path to the local archive to load. | ||||
Or None if any problem arose. | Or None if any problem arose. | ||||
""" | """ | ||||
r = self.do("get", archive_update_url, stream=True) | response = self.do("get", archive_update_url, stream=True) | ||||
if r.ok: | if response.ok: | ||||
with open(archive, "wb") as f: | with open(archive, "wb") as f: | ||||
for chunk in r.iter_content(): | for chunk in response.iter_content(): | ||||
f.write(chunk) | f.write(chunk) | ||||
return archive | return archive | ||||
msg = "Problem when retrieving deposit archive at %s" % (archive_update_url,) | msg = "Problem when retrieving deposit archive at %s" % (archive_update_url,) | ||||
logger.error(msg) | logger.error(msg) | ||||
raise ValueError(msg) | raise ValueError(msg) | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | class BaseDepositClient(BaseApiDepositClient): | ||||
def compute_url(self, *args, **kwargs): | def compute_url(self, *args, **kwargs): | ||||
"""Compute api url endpoint to query.""" | """Compute api url endpoint to query.""" | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def compute_method(self, *args, **kwargs): | def compute_method(self, *args, **kwargs): | ||||
"""Http method to use on the url""" | """Http method to use on the url""" | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def parse_result_ok(self, xml_content): | def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: | ||||
"""Given an xml result from the api endpoint, parse it and returns a | """Given an xml result from the api endpoint, parse it and returns a | ||||
dict. | dict. | ||||
""" | """ | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def compute_information(self, *args, **kwargs) -> Dict[str, Any]: | def compute_information(self, *args, **kwargs) -> Dict[str, Any]: | ||||
"""Compute some more information given the inputs (e.g http headers, | """Compute some more information given the inputs (e.g http headers, | ||||
...) | ...) | ||||
""" | """ | ||||
return {} | return {} | ||||
def parse_result_error(self, xml_content: bytes) -> Dict: | def parse_result_error(self, xml_content: str) -> Dict[str, Any]: | ||||
"""Given an error response in xml, parse it into a dict. | """Given an error response in xml, parse it into a dict. | ||||
Returns: | Returns: | ||||
dict with following keys: | dict with following keys: | ||||
'error': The error message | 'error': The error message | ||||
'detail': Some more detail about the error if any | 'detail': Some more detail about the error if any | ||||
""" | """ | ||||
data = parse_xml(xml_content) | data = parse_xml(xml_content) | ||||
sword_error = data["sword:error"] | sword_error = data["sword:error"] | ||||
return { | return { | ||||
"summary": sword_error["atom:summary"], | "summary": sword_error["atom:summary"], | ||||
"detail": sword_error.get("detail", ""), | "detail": sword_error.get("detail", ""), | ||||
"sword:verboseDescription": sword_error.get("sword:verboseDescription", ""), | "sword:verboseDescription": sword_error.get("sword:verboseDescription", ""), | ||||
} | } | ||||
def do_execute(self, method, url, info): | def do_execute(self, method: str, url: str, info: Dict) -> Response: | ||||
"""Execute the http query to url using method and info information. | """Execute the http query to url using method and info information. | ||||
By default, execute a simple query to url with the http | By default, execute a simple query to url with the http | ||||
method. Override this in daughter class to improve the | method. Override this in daughter class to improve the | ||||
default behavior if needed. | default behavior if needed. | ||||
""" | """ | ||||
return self.do(method, url) | return self.do(method, url) | ||||
def execute(self, *args, **kwargs) -> Dict[str, Any]: | def execute(self, *args, **kwargs) -> Dict[str, Any]: | ||||
"""Main endpoint to prepare and execute the http query to the api. | """Main endpoint to prepare and execute the http query to the api. | ||||
Raises: | Raises: | ||||
MaintenanceError if some api maintenance is happening. | MaintenanceError if some api maintenance is happening. | ||||
Returns: | Returns: | ||||
Dict of computed api data | Dict of computed api data | ||||
""" | """ | ||||
url = self.compute_url(*args, **kwargs) | url = self.compute_url(*args, **kwargs) | ||||
method = self.compute_method(*args, **kwargs) | method = self.compute_method(*args, **kwargs) | ||||
info = self.compute_information(*args, **kwargs) | info = self.compute_information(*args, **kwargs) | ||||
try: | try: | ||||
r = self.do_execute(method, url, info) | response = self.do_execute(method, url, info) | ||||
except Exception as e: | except Exception as e: | ||||
msg = self.error_msg % (url, e) | msg = self.error_msg % (url, e) | ||||
r = self.empty_result | result = self.empty_result | ||||
r.update( | result.update( | ||||
{"error": msg,} | {"error": msg,} | ||||
) | ) | ||||
return r | return result | ||||
else: | else: | ||||
if r.ok: | if response.ok: | ||||
if int(r.status_code) == 204: # 204 returns no body | if int(response.status_code) == 204: # 204 returns no body | ||||
return {"status": r.status_code} | return {"status": response.status_code} | ||||
else: | else: | ||||
return self.parse_result_ok(r.text) | return self.parse_result_ok(response.text) | ||||
else: | else: | ||||
error = self.parse_result_error(r.text) | error = self.parse_result_error(response.text) | ||||
empty = self.empty_result | empty = self.empty_result | ||||
error.update(empty) | error.update(empty) | ||||
if r.status_code == 503: | if response.status_code == 503: | ||||
summary = error.get("summary") | summary = error.get("summary") | ||||
detail = error.get("sword:verboseDescription") | detail = error.get("sword:verboseDescription") | ||||
# Maintenance error | # Maintenance error | ||||
if summary and detail: | if summary and detail: | ||||
raise MaintenanceError(f"{summary}: {detail}") | raise MaintenanceError(f"{summary}: {detail}") | ||||
error.update( | error.update( | ||||
{"status": r.status_code,} | {"status": response.status_code,} | ||||
) | ) | ||||
return error | return error | ||||
class ServiceDocumentDepositClient(BaseDepositClient): | class ServiceDocumentDepositClient(BaseDepositClient): | ||||
"""Service Document information retrieval. | """Service Document information retrieval. | ||||
""" | """ | ||||
def __init__(self, config=None, url=None, auth=None): | def __init__(self, config=None, url=None, auth=None): | ||||
super().__init__( | super().__init__( | ||||
url=url, | url=url, | ||||
auth=auth, | auth=auth, | ||||
config=config, | config=config, | ||||
error_msg="Service document failure at %s: %s", | error_msg="Service document failure at %s: %s", | ||||
empty_result={"collection": None}, | empty_result={"collection": None}, | ||||
) | ) | ||||
def compute_url(self, *args, **kwargs): | def compute_url(self, *args, **kwargs): | ||||
return "/servicedocument/" | return "/servicedocument/" | ||||
def compute_method(self, *args, **kwargs): | def compute_method(self, *args, **kwargs): | ||||
return "get" | return "get" | ||||
def parse_result_ok(self, xml_content): | def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: | ||||
"""Parse service document's success response. | """Parse service document's success response. | ||||
""" | """ | ||||
return parse_xml(xml_content) | return parse_xml(xml_content) | ||||
def parse_result_error(self, xml_content: bytes) -> Dict: | def parse_result_error(self, xml_content: str) -> Dict[str, Any]: | ||||
result = super().parse_result_error(xml_content) | result = super().parse_result_error(xml_content) | ||||
return {"error": result["summary"]} | return {"error": result["summary"]} | ||||
class StatusDepositClient(BaseDepositClient): | class StatusDepositClient(BaseDepositClient): | ||||
"""Status information on a deposit. | """Status information on a deposit. | ||||
""" | """ | ||||
Show All 12 Lines | def __init__(self, config=None, url=None, auth=None): | ||||
) | ) | ||||
def compute_url(self, collection, deposit_id): | def compute_url(self, collection, deposit_id): | ||||
return "/%s/%s/status/" % (collection, deposit_id) | return "/%s/%s/status/" % (collection, deposit_id) | ||||
def compute_method(self, *args, **kwargs): | def compute_method(self, *args, **kwargs): | ||||
return "get" | return "get" | ||||
def parse_result_ok(self, xml_content): | def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: | ||||
"""Given an xml content as string, returns a deposit dict. | """Given an xml content as string, returns a deposit dict. | ||||
""" | """ | ||||
data = parse_xml(xml_content) | data = parse_xml(xml_content) | ||||
keys = [ | keys = [ | ||||
"deposit_id", | "deposit_id", | ||||
"deposit_status", | "deposit_status", | ||||
"deposit_status_detail", | "deposit_status_detail", | ||||
Show All 19 Lines | def __init__(self, config=None, url=None, auth=None): | ||||
) | ) | ||||
def compute_url(self, collection, *args, **kwargs): | def compute_url(self, collection, *args, **kwargs): | ||||
return "/%s/" % collection | return "/%s/" % collection | ||||
def compute_method(self, *args, **kwargs): | def compute_method(self, *args, **kwargs): | ||||
return "post" | return "post" | ||||
def parse_result_ok(self, xml_content): | def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: | ||||
"""Given an xml content as string, returns a deposit dict. | """Given an xml content as string, returns a deposit dict. | ||||
""" | """ | ||||
data = parse_xml(xml_content) | data = parse_xml(xml_content) | ||||
keys = [ | keys = [ | ||||
"deposit_id", | "deposit_id", | ||||
"deposit_status", | "deposit_status", | ||||
"deposit_status_detail", | "deposit_status_detail", | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | class CreateMetadataOnlyDepositClient(BaseCreateDepositClient): | ||||
"""Create metadata-only deposit.""" | """Create metadata-only deposit.""" | ||||
def compute_information(self, *args, **kwargs) -> Dict[str, Any]: | def compute_information(self, *args, **kwargs) -> Dict[str, Any]: | ||||
return { | return { | ||||
"headers": {"CONTENT-TYPE": "application/atom+xml;type=entry",}, | "headers": {"CONTENT-TYPE": "application/atom+xml;type=entry",}, | ||||
"filepath": kwargs["metadata_path"], | "filepath": kwargs["metadata_path"], | ||||
} | } | ||||
def parse_result_ok(self, xml_content): | def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: | ||||
"""Given an xml content as string, returns a deposit dict. | """Given an xml content as string, returns a deposit dict. | ||||
""" | """ | ||||
data = parse_xml(xml_content) | data = parse_xml(xml_content) | ||||
keys = [ | keys = [ | ||||
"deposit_id", | "deposit_id", | ||||
"deposit_status", | "deposit_status", | ||||
"deposit_date", | "deposit_date", | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | def deposit_update( | ||||
slug: Optional[str], | slug: Optional[str], | ||||
archive: Optional[str] = None, | archive: Optional[str] = None, | ||||
metadata: Optional[str] = None, | metadata: Optional[str] = None, | ||||
in_progress: bool = False, | in_progress: bool = False, | ||||
replace: bool = False, | replace: bool = False, | ||||
swhid: Optional[str] = None, | swhid: Optional[str] = None, | ||||
): | ): | ||||
"""Update (add/replace) existing deposit (archive, metadata, both).""" | """Update (add/replace) existing deposit (archive, metadata, both).""" | ||||
r = self.deposit_status(collection, deposit_id) | response = self.deposit_status(collection, deposit_id) | ||||
if "error" in r: | if "error" in response: | ||||
return r | return response | ||||
status = r["deposit_status"] | status = response["deposit_status"] | ||||
if swhid is None and status != "partial": | if swhid is None and status != "partial": | ||||
return { | return { | ||||
"error": "You can only act on deposit with status 'partial'", | "error": "You can only act on deposit with status 'partial'", | ||||
"detail": f"The deposit {deposit_id} has status '{status}'", | "detail": f"The deposit {deposit_id} has status '{status}'", | ||||
"deposit_status": status, | "deposit_status": status, | ||||
"deposit_id": deposit_id, | "deposit_id": deposit_id, | ||||
} | } | ||||
if swhid is not None and status != "done": | if swhid is not None and status != "done": | ||||
return { | return { | ||||
"error": "You can only update metadata on deposit with status 'done'", | "error": "You can only update metadata on deposit with status 'done'", | ||||
"detail": f"The deposit {deposit_id} has status '{status}'", | "detail": f"The deposit {deposit_id} has status '{status}'", | ||||
"deposit_status": status, | "deposit_status": status, | ||||
"deposit_id": deposit_id, | "deposit_id": deposit_id, | ||||
} | } | ||||
if archive and not metadata: | if archive and not metadata: | ||||
r = UpdateArchiveDepositClient(url=self.base_url, auth=self.auth).execute( | result = UpdateArchiveDepositClient( | ||||
url=self.base_url, auth=self.auth | |||||
).execute( | |||||
collection, | collection, | ||||
in_progress, | in_progress, | ||||
slug, | slug, | ||||
deposit_id=deposit_id, | deposit_id=deposit_id, | ||||
archive_path=archive, | archive_path=archive, | ||||
replace=replace, | replace=replace, | ||||
) | ) | ||||
elif not archive and metadata and swhid is None: | elif not archive and metadata and swhid is None: | ||||
r = UpdateMetadataOnPartialDepositClient( | result = UpdateMetadataOnPartialDepositClient( | ||||
url=self.base_url, auth=self.auth | url=self.base_url, auth=self.auth | ||||
).execute( | ).execute( | ||||
collection, | collection, | ||||
in_progress, | in_progress, | ||||
slug, | slug, | ||||
deposit_id=deposit_id, | deposit_id=deposit_id, | ||||
metadata_path=metadata, | metadata_path=metadata, | ||||
replace=replace, | replace=replace, | ||||
) | ) | ||||
elif not archive and metadata and swhid is not None: | elif not archive and metadata and swhid is not None: | ||||
r = UpdateMetadataOnDoneDepositClient( | result = UpdateMetadataOnDoneDepositClient( | ||||
url=self.base_url, auth=self.auth | url=self.base_url, auth=self.auth | ||||
).execute( | ).execute( | ||||
collection, | collection, | ||||
in_progress, | in_progress, | ||||
slug, | slug, | ||||
deposit_id=deposit_id, | deposit_id=deposit_id, | ||||
metadata_path=metadata, | metadata_path=metadata, | ||||
swhid=swhid, | swhid=swhid, | ||||
) | ) | ||||
else: | else: | ||||
r = UpdateMultipartDepositClient(url=self.base_url, auth=self.auth).execute( | result = UpdateMultipartDepositClient( | ||||
url=self.base_url, auth=self.auth | |||||
).execute( | |||||
collection, | collection, | ||||
in_progress, | in_progress, | ||||
slug, | slug, | ||||
deposit_id=deposit_id, | deposit_id=deposit_id, | ||||
archive_path=archive, | archive_path=archive, | ||||
metadata_path=metadata, | metadata_path=metadata, | ||||
replace=replace, | replace=replace, | ||||
) | ) | ||||
if "error" in r: | if "error" in result: | ||||
return r | return result | ||||
return self.deposit_status(collection, deposit_id) | return self.deposit_status(collection, deposit_id) | ||||
def deposit_metadata_only( | def deposit_metadata_only( | ||||
self, collection: str, metadata: Optional[str] = None, | self, collection: str, metadata: Optional[str] = None, | ||||
): | ): | ||||
assert metadata is not None | assert metadata is not None | ||||
return CreateMetadataOnlyDepositClient( | return CreateMetadataOnlyDepositClient( | ||||
url=self.base_url, auth=self.auth | url=self.base_url, auth=self.auth | ||||
).execute(collection, metadata_path=metadata) | ).execute(collection, metadata_path=metadata) |