Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/client/__init__.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Module in charge of defining an swh-deposit client | """Module in charge of defining an swh-deposit client | ||||
""" | """ | ||||
import hashlib | import hashlib | ||||
import os | import os | ||||
import requests | import requests | ||||
import xmltodict | import xmltodict | ||||
import logging | |||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
from swh.core.config import SWHConfig | from swh.core.config import SWHConfig | ||||
logger = logging.getLogger(__name__) | |||||
def _parse(stream, encoding='utf-8'): | def _parse(stream, encoding='utf-8'): | ||||
"""Given a xml stream, parse the result. | """Given a xml stream, parse the result. | ||||
Args: | Args: | ||||
stream (bytes/text): The stream to parse | stream (bytes/text): The stream to parse | ||||
encoding (str): The encoding to use if to decode the bytes | encoding (str): The encoding to use if to decode the bytes | ||||
stream | stream | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | |||||
class PrivateApiDepositClient(BaseApiDepositClient): | class PrivateApiDepositClient(BaseApiDepositClient): | ||||
"""Private API deposit client to: | """Private API deposit client to: | ||||
- read a given deposit's archive(s) | - read a given deposit's archive(s) | ||||
- read a given deposit's metadata | - read a given deposit's metadata | ||||
- update a given deposit's status | - update a given deposit's status | ||||
""" | """ | ||||
def archive_get(self, archive_update_url, archive_path, log=None): | def archive_get(self, archive_update_url, archive): | ||||
"""Retrieve the archive from the deposit to a local directory. | """Retrieve the archive from the deposit to a local directory. | ||||
Args: | Args: | ||||
archive_update_url (str): The full deposit archive(s)'s raw content | archive_update_url (str): The full deposit archive(s)'s raw content | ||||
to retrieve locally | to retrieve locally | ||||
archive_path (str): the local archive's path where to store | archive (str): the local archive's path where to store | ||||
the raw content | the raw content | ||||
Returns: | Returns: | ||||
The archive path to the local archive to load. | The archive path to the local archive to load. | ||||
Or None if any problem arose. | Or None if any problem arose. | ||||
""" | """ | ||||
r = self.do('get', archive_update_url, stream=True) | r = self.do('get', archive_update_url, stream=True) | ||||
if r.ok: | if r.ok: | ||||
with open(archive_path, 'wb') as f: | with open(archive, 'wb') as f: | ||||
for chunk in r.iter_content(): | for chunk in r.iter_content(): | ||||
f.write(chunk) | f.write(chunk) | ||||
return archive_path | return archive | ||||
msg = 'Problem when retrieving deposit archive at %s' % ( | msg = 'Problem when retrieving deposit archive at %s' % ( | ||||
archive_update_url, ) | archive_update_url, ) | ||||
if log: | logger.error(msg) | ||||
log.error(msg) | |||||
raise ValueError(msg) | raise ValueError(msg) | ||||
def metadata_get(self, metadata_url, log=None): | def metadata_get(self, metadata_url): | ||||
"""Retrieve the metadata information on a given deposit. | """Retrieve the metadata information on a given deposit. | ||||
Args: | Args: | ||||
metadata_url (str): The full deposit metadata url to retrieve | metadata_url (str): The full deposit metadata url to retrieve | ||||
locally | locally | ||||
Returns: | Returns: | ||||
The dictionary of metadata for that deposit or None if any | The dictionary of metadata for that deposit or None if any | ||||
problem arose. | problem arose. | ||||
""" | """ | ||||
r = self.do('get', metadata_url) | r = self.do('get', metadata_url) | ||||
if r.ok: | if r.ok: | ||||
return r.json() | return r.json() | ||||
msg = 'Problem when retrieving metadata at %s' % metadata_url | msg = 'Problem when retrieving metadata at %s' % metadata_url | ||||
if log: | logger.error(msg) | ||||
log.error(msg) | |||||
raise ValueError(msg) | raise ValueError(msg) | ||||
def status_update(self, update_status_url, status, | def status_update(self, update_status_url, status, | ||||
revision_id=None, directory_id=None, origin_url=None): | revision_id=None, directory_id=None, origin_url=None): | ||||
"""Update the deposit's status. | """Update the deposit's status. | ||||
Args: | Args: | ||||
Show All 9 Lines | def status_update(self, update_status_url, status, | ||||
payload['revision_id'] = revision_id | payload['revision_id'] = revision_id | ||||
if directory_id: | if directory_id: | ||||
payload['directory_id'] = directory_id | payload['directory_id'] = directory_id | ||||
if origin_url: | if origin_url: | ||||
payload['origin_url'] = origin_url | payload['origin_url'] = origin_url | ||||
self.do('put', update_status_url, json=payload) | self.do('put', update_status_url, json=payload) | ||||
def check(self, check_url, log=None): | def check(self, check_url): | ||||
"""Check the deposit's associated data (metadata, archive(s)) | """Check the deposit's associated data (metadata, archive(s)) | ||||
Args: | Args: | ||||
check_url (str): the full deposit's check url | check_url (str): the full deposit's check url | ||||
""" | """ | ||||
r = self.do('get', check_url) | r = self.do('get', check_url) | ||||
if r.ok: | if r.ok: | ||||
data = r.json() | data = r.json() | ||||
return data['status'] | return data['status'] | ||||
msg = 'Problem when checking deposit %s' % check_url | msg = 'Problem when checking deposit %s' % check_url | ||||
if log: | logger.error(msg) | ||||
log.error(msg) | |||||
raise ValueError(msg) | raise ValueError(msg) | ||||
class BaseDepositClient(BaseApiDepositClient, metaclass=ABCMeta): | class BaseDepositClient(BaseApiDepositClient, metaclass=ABCMeta): | ||||
"""Base Deposit client to access the public api. | """Base Deposit client to access the public api. | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | class ServiceDocumentDepositClient(BaseDepositClient): | ||||
def compute_method(self, *args, **kwargs): | def compute_method(self, *args, **kwargs): | ||||
return 'get' | return 'get' | ||||
def parse_result_ok(self, xml_content): | def parse_result_ok(self, xml_content): | ||||
"""Parse service document's success response. | """Parse service document's success response. | ||||
""" | """ | ||||
return _parse_with_filter(xml_content, keys=['collection']) | return _parse(xml_content) | ||||
class StatusDepositClient(BaseDepositClient): | class StatusDepositClient(BaseDepositClient): | ||||
"""Status information on a deposit. | """Status information on a deposit. | ||||
""" | """ | ||||
def __init__(self, config): | def __init__(self, config): | ||||
super().__init__(config, | super().__init__(config, | ||||
▲ Show 20 Lines • Show All 157 Lines • ▼ Show 20 Lines | def _multipart_info(self, info, info_meta): | ||||
headers = { | headers = { | ||||
'SLUG': info['slug'], | 'SLUG': info['slug'], | ||||
'CONTENT_MD5': info['md5sum'], | 'CONTENT_MD5': info['md5sum'], | ||||
'IN-PROGRESS': str(info['in_progress']), | 'IN-PROGRESS': str(info['in_progress']), | ||||
} | } | ||||
return files, headers | return files, headers | ||||
def compute_information(self, collection, archive_path, metadata_path, | def compute_information(self, collection, archive, metadata, | ||||
in_progress, slug, **kwargs): | in_progress, slug, **kwargs): | ||||
info = self._compute_information( | info = self._compute_information( | ||||
collection, archive_path, in_progress, slug) | collection, archive, in_progress, slug) | ||||
info_meta = self._compute_information( | info_meta = self._compute_information( | ||||
collection, metadata_path, in_progress, slug, is_archive=False) | collection, metadata, in_progress, slug, is_archive=False) | ||||
files, headers = self._multipart_info(info, info_meta) | files, headers = self._multipart_info(info, info_meta) | ||||
return {'files': files, 'headers': headers} | return {'files': files, 'headers': headers} | ||||
def do_execute(self, method, url, info): | def do_execute(self, method, url, info): | ||||
return self.do( | return self.do( | ||||
method, url, files=info['files'], headers=info['headers']) | method, url, files=info['files'], headers=info['headers']) | ||||
class UpdateMultipartDepositClient(CreateMultipartDepositClient): | class UpdateMultipartDepositClient(CreateMultipartDepositClient): | ||||
"""Update a multipart deposit client.""" | """Update a multipart deposit client.""" | ||||
def compute_url(self, collection, *args, deposit_id=None, **kwargs): | def compute_url(self, collection, *args, deposit_id=None, **kwargs): | ||||
return '/%s/%s/metadata/' % (collection, deposit_id) | return '/%s/%s/metadata/' % (collection, deposit_id) | ||||
def compute_method(self, *args, replace=False, **kwargs): | def compute_method(self, *args, replace=False, **kwargs): | ||||
return 'put' if replace else 'post' | return 'put' if replace else 'post' | ||||
class PublicApiDepositClient(BaseApiDepositClient): | class PublicApiDepositClient(BaseApiDepositClient): | ||||
"""Public api deposit client.""" | """Public api deposit client.""" | ||||
def service_document(self, log=None): | def service_document(self): | ||||
"""Retrieve service document endpoint's information.""" | """Retrieve service document endpoint's information.""" | ||||
return ServiceDocumentDepositClient(self.config).execute() | return ServiceDocumentDepositClient(self.config).execute() | ||||
def deposit_status(self, collection, deposit_id, log=None): | def deposit_status(self, collection, deposit_id): | ||||
"""Retrieve status information on a deposit.""" | """Retrieve status information on a deposit.""" | ||||
return StatusDepositClient(self.config).execute( | return StatusDepositClient(self.config).execute( | ||||
collection, deposit_id) | collection, deposit_id) | ||||
def deposit_create(self, collection, slug, archive_path=None, | def deposit_create(self, collection, slug, archive=None, | ||||
metadata_path=None, in_progress=False, log=None): | metadata=None, in_progress=False): | ||||
"""Create a new deposit (archive, metadata, both as multipart).""" | """Create a new deposit (archive, metadata, both as multipart).""" | ||||
if archive_path and not metadata_path: | if archive and not metadata: | ||||
return CreateArchiveDepositClient(self.config).execute( | return CreateArchiveDepositClient(self.config).execute( | ||||
collection, archive_path, in_progress, slug) | collection, archive, in_progress, slug) | ||||
elif not archive_path and metadata_path: | elif not archive and metadata: | ||||
return CreateMetadataDepositClient(self.config).execute( | return CreateMetadataDepositClient(self.config).execute( | ||||
collection, metadata_path, in_progress, slug, | collection, metadata, in_progress, slug, | ||||
is_archive=False) | is_archive=False) | ||||
else: | else: | ||||
return CreateMultipartDepositClient(self.config).execute( | return CreateMultipartDepositClient(self.config).execute( | ||||
collection, archive_path, metadata_path, in_progress, | collection, archive, metadata, in_progress, | ||||
slug) | slug) | ||||
def deposit_update(self, collection, deposit_id, slug, archive_path=None, | def deposit_update(self, collection, deposit_id, slug, archive=None, | ||||
metadata_path=None, in_progress=False, | metadata=None, in_progress=False, | ||||
replace=False, log=None): | replace=False): | ||||
"""Update (add/replace) existing deposit (archive, metadata, both).""" | """Update (add/replace) existing deposit (archive, metadata, both).""" | ||||
r = self.deposit_status(collection, deposit_id, log=log) | r = self.deposit_status(collection, deposit_id) | ||||
if 'error' in r: | if 'error' in r: | ||||
return r | return r | ||||
status = r['deposit_status'] | status = r['deposit_status'] | ||||
if status != 'partial': | if status != 'partial': | ||||
return { | return { | ||||
'error': "You can only act on deposit with status 'partial'", | 'error': "You can only act on deposit with status 'partial'", | ||||
'detail': "The deposit %s has status '%s'" % ( | 'detail': "The deposit %s has status '%s'" % ( | ||||
deposit_id, status), | deposit_id, status), | ||||
'deposit_status': status, | 'deposit_status': status, | ||||
'deposit_id': deposit_id, | 'deposit_id': deposit_id, | ||||
} | } | ||||
if archive_path and not metadata_path: | if archive and not metadata: | ||||
r = UpdateArchiveDepositClient(self.config).execute( | r = UpdateArchiveDepositClient(self.config).execute( | ||||
collection, archive_path, in_progress, slug, | collection, archive, in_progress, slug, | ||||
deposit_id=deposit_id, replace=replace, log=log) | deposit_id=deposit_id, replace=replace) | ||||
elif not archive_path and metadata_path: | elif not archive and metadata: | ||||
r = UpdateMetadataDepositClient(self.config).execute( | r = UpdateMetadataDepositClient(self.config).execute( | ||||
collection, metadata_path, in_progress, slug, | collection, metadata, in_progress, slug, | ||||
deposit_id=deposit_id, replace=replace, log=log) | deposit_id=deposit_id, replace=replace) | ||||
else: | else: | ||||
r = UpdateMultipartDepositClient(self.config).execute( | r = UpdateMultipartDepositClient(self.config).execute( | ||||
collection, archive_path, metadata_path, in_progress, | collection, archive, metadata, in_progress, | ||||
slug, deposit_id=deposit_id, replace=replace, log=log) | slug, deposit_id=deposit_id, replace=replace) | ||||
if 'error' in r: | if 'error' in r: | ||||
return r | return r | ||||
return self.deposit_status(collection, deposit_id, log=log) | return self.deposit_status(collection, deposit_id) |