diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -19,7 +19,6 @@ python3-vcversioner, python3-djangorestframework, python3-requests, - python3-lxml, python3-xmltodict, patool Standards-Version: 3.9.6 @@ -39,7 +38,7 @@ Depends: python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.25~), python3-requests, - python3-lxml, + python3-xmltodict, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Api Client diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,4 @@ click Django djangorestframework -lxml xmltodict diff --git a/swh/deposit/client/__init__.py b/swh/deposit/client/__init__.py --- a/swh/deposit/client/__init__.py +++ b/swh/deposit/client/__init__.py @@ -10,13 +10,54 @@ import hashlib import os import requests +import xmltodict from abc import ABCMeta, abstractmethod -from lxml import etree from swh.core.config import SWHConfig +def _parse(stream, encoding='utf-8'): + """Given a xml stream, parse the result. + + Args: + stream (bytes/text): The stream to parse + encoding (str): The encoding to use if to decode the bytes + stream + + Returns: + A dict of values corresponding to the parsed xml + + """ + if isinstance(stream, bytes): + stream = stream.decode(encoding) + data = xmltodict.parse(stream, encoding=encoding, process_namespaces=False) + if 'entry' in data: + data = data['entry'] + return dict(data) + + +def _parse_with_filter(stream, encoding='utf-8', keys=[]): + """Given a xml stream, parse the result and filter with keys. + + Args: + stream (bytes/text): The stream to parse + encoding (str): The encoding to use if to decode the bytes + stream + keys ([str]): Keys to filter the parsed result + + Returns: + A dict of values corresponding to the parsed xml filtered by + the keys provided. + + """ + data = _parse(stream, encoding=encoding) + m = {} + for key in keys: + m[key] = data.get(key) + return m + + class BaseApiDepositClient(SWHConfig): """Deposit client base class @@ -208,24 +249,7 @@ 'detail': Some more detail about the error if any """ - tree = etree.fromstring(xml_content.encode('utf-8')) - vals = tree.xpath('/x:error/y:summary', namespaces={ - 'x': 'http://purl.org/net/sword/', - 'y': 'http://www.w3.org/2005/Atom' - }) - summary = vals[0].text - if summary: - summary = summary.strip() - - vals = tree.xpath( - '/x:error/x:verboseDescription', - namespaces={'x': 'http://purl.org/net/sword/'}) - if vals: - detail = vals[0].text.strip() - else: - detail = None - - return {'error': summary, 'detail': detail} + return _parse_with_filter(xml_content, keys=['summary', 'detail']) def do_execute(self, method, url, info): """Execute the http query to url using method and info information. @@ -289,19 +313,7 @@ """Parse service document's success response. """ - tree = etree.fromstring(xml_content.encode('utf-8')) - collections = tree.xpath( - '/x:service/x:workspace/x:collection/y:name', - namespaces={'x': 'http://www.w3.org/2007/app', - 'y': 'http://purl.org/net/sword/terms/'}) - if collections: - collection = collections[0].text - else: - collection = None - - return { - 'collection': collection - } + return _parse_with_filter(xml_content, keys=['collection']) class StatusDepositClient(BaseDepositClient): @@ -327,63 +339,14 @@ """Given an xml content as string, returns a deposit dict. """ - tree = etree.fromstring(xml_content.encode('utf-8')) - vals = tree.xpath( - '/x:entry/x:deposit_id', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - deposit_id = vals[0].text - - vals = tree.xpath( - '/x:entry/x:deposit_status', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - deposit_status = vals[0].text - - vals = tree.xpath( - '/x:entry/x:deposit_status_detail', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - deposit_status_detail = vals[0].text - - vals = tree.xpath( - '/x:entry/x:deposit_swh_id', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - if vals: - deposit_swh_id = vals[0].text - else: - deposit_swh_id = None - - vals = tree.xpath( - '/x:entry/x:deposit_swh_id_context', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - if vals: - deposit_swh_id_context = vals[0].text - else: - deposit_swh_id_context = None - - vals = tree.xpath( - '/x:entry/x:deposit_swh_anchor_id', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - if vals: - deposit_swh_anchor_id = vals[0].text - else: - deposit_swh_anchor_id = None - - vals = tree.xpath( - '/x:entry/x:deposit_swh_anchor_id_context', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - if vals: - deposit_swh_anchor_id_context = vals[0].text - else: - deposit_swh_anchor_id_context = None - - return { - 'deposit_id': deposit_id, - 'deposit_status': deposit_status, - 'deposit_status_detail': deposit_status_detail, - 'deposit_swh_id': deposit_swh_id, - 'deposit_swh_id_context': deposit_swh_id_context, - 'deposit_swh_anchor_id': deposit_swh_anchor_id, - 'deposit_swh_anchor_id_context': deposit_swh_anchor_id_context, - } + return _parse_with_filter(xml_content, keys=[ + 'deposit_id', + 'deposit_status', + 'deposit_status_detail', + 'deposit_swh_id', + 'deposit_swh_id_context', + 'deposit_swh_anchor_id', + 'deposit_swh_anchor_id_context']) class BaseCreateDepositClient(BaseDepositClient): @@ -408,27 +371,10 @@ """Given an xml content as string, returns a deposit dict. """ - tree = etree.fromstring(xml_content.encode('utf-8')) - vals = tree.xpath( - '/x:entry/x:deposit_id', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - deposit_id = vals[0].text - - vals = tree.xpath( - '/x:entry/x:deposit_status', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - deposit_status = vals[0].text - - vals = tree.xpath( - '/x:entry/x:deposit_date', - namespaces={'x': 'http://www.w3.org/2005/Atom'}) - deposit_date = vals[0].text - - return { - 'deposit_id': deposit_id, - 'deposit_status': deposit_status, - 'deposit_date': deposit_date, - } + return _parse_with_filter(xml_content, keys=['deposit_id', + 'deposit_status', + 'deposit_status_detail', + 'deposit_date']) def _compute_information(self, collection, filepath, in_progress, slug, is_archive=True):