diff --git a/assets/src/bundles/admin/deposit.js b/assets/src/bundles/admin/deposit.js --- a/assets/src/bundles/admin/deposit.js +++ b/assets/src/bundles/admin/deposit.js @@ -1,17 +1,22 @@ /** - * Copyright (C) 2018-2021 The Software Heritage developers + * Copyright (C) 2018-2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ function genSwhLink(data, type) { - if (type === 'display') { - if (data && data.startsWith('swh')) { - const browseUrl = Urls.browse_swhid(data); - const formattedSWHID = data.replace(/;/g, ';
'); - return `${formattedSWHID}`; - } + if (type === 'display' && data && data.startsWith('swh')) { + const browseUrl = Urls.browse_swhid(data); + const formattedSWHID = data.replace(/;/g, ';
'); + return `${formattedSWHID}`; + } + return data; +} + +function genLink(data, type) { + if (type === 'display' && data) { + return `${data}`; } return data; } @@ -55,23 +60,14 @@ name: 'id' }, { - data: 'swhid_context', - name: 'swhid_context', + data: 'type', + name: 'type' + }, + { + data: 'uri', + name: 'uri', render: (data, type, row) => { - if (data && type === 'display') { - const originPattern = ';origin='; - const originPatternIdx = data.indexOf(originPattern); - if (originPatternIdx !== -1) { - let originUrl = data.slice(originPatternIdx + originPattern.length); - const nextSepPattern = ';'; - const nextSepPatternIdx = originUrl.indexOf(nextSepPattern); - if (nextSepPatternIdx !== -1) { /* Remove extra context */ - originUrl = originUrl.slice(0, nextSepPatternIdx); - } - return `${originUrl}`; - } - } - return data; + return genLink(data, type); } }, { diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py --- a/swh/web/admin/deposit.py +++ b/swh/web/admin/deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,7 +13,11 @@ from swh.web.admin.adminurls import admin_route from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION -from swh.web.common.utils import get_deposits_list +from swh.web.common.utils import ( + get_deposits_list, + parse_swh_deposit_origin, + parse_swh_metadata_provenance, +) def _can_list_deposits(user): @@ -70,9 +74,11 @@ data = paginator.page(page).object_list table_data["recordsTotal"] = deposits_count table_data["recordsFiltered"] = len(deposits) - table_data["data"] = [ - { + data_list = [] + for d in data: + data_dict = { "id": d["id"], + "type": d["type"], "external_id": d["external_id"], "reception_date": d["reception_date"], "status": d["status"], @@ -80,13 +86,41 @@ "swhid": d["swhid"], "swhid_context": d["swhid_context"], } - for d in data - ] + provenance = None + raw_metadata = d["raw_metadata"] + # Try to determine provenance out of the raw metadata + if raw_metadata and d["type"] == "meta": # metadata provenance + provenance = parse_swh_metadata_provenance(d["raw_metadata"]) + elif raw_metadata and d["type"] == "code": + provenance = parse_swh_deposit_origin(raw_metadata) + + if not provenance and d["origin_url"]: + provenance = d["origin_url"] + + # Finally, if still not found, we determine uri using the swhid + if not provenance and d["swhid_context"]: + # Trying to compute the origin as we did before in the js + from swh.model.swhids import QualifiedSWHID + + swhid = QualifiedSWHID.from_string(d["swhid_context"]) + provenance = swhid.origin + + data_dict["uri"] = provenance # could be None + + # This could be large. As this is not displayed yet, drop it to avoid + # cluttering the data dict + data_dict.pop("raw_metadata", None) + + print(f"############# data_dict: {data_dict}") + + data_list.append(data_dict) + + table_data["data"] = data_list except Exception as exc: sentry_sdk.capture_exception(exc) - table_data["error"] = ( - "An error occurred while retrieving " "the list of deposits !" - ) + table_data[ + "error" + ] = "An error occurred while retrieving the list of deposits !" return JsonResponse(table_data) diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,7 @@ import re from typing import Any, Dict, List, Optional import urllib.parse +from xml.etree import ElementTree from bs4 import BeautifulSoup from docutils.core import publish_parts @@ -447,3 +448,79 @@ request_path = resolve(request.path_info) args = {**request_path.kwargs, **request.GET.dict()} return redirect(reverse(new_route, query_params=args), permanent=permanent,) + + +NAMESPACES = { + "atom": "http://www.w3.org/2005/Atom", + "app": "http://www.w3.org/2007/app", + "dc": "http://purl.org/dc/terms/", + "codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + "sword": "http://purl.org/net/sword/terms/", + "swh": "https://www.softwareheritage.org/schema/2018/deposit", + "schema": "http://schema.org/", +} + + +def parse_swh_metadata_provenance(raw_metadata: str) -> Optional[str]: + """Parse swh metadata-provenance out of the raw metadata deposit. If found, returns the + value, None otherwise. + + .. code-block:: xml + + + + https://example.org/metadata/url + + + + Args: + raw_metadata: raw metadata out of deposits received + + Returns: + Either the metadata provenance url if any or None otherwise + + """ + metadata = ElementTree.fromstring(raw_metadata) + url_element = metadata.find( + "swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES, + ) + return url_element.text if url_element is not None else None + + +def parse_swh_deposit_origin(raw_metadata: str) -> Optional[str]: + """Parses and from metadata document, + if any. + + .. code-block:: xml + + + + + + + + .. code-block:: xml + + + + + + + + Returns: + The one not null if any + + """ + metadata = ElementTree.fromstring(raw_metadata) + create_origin = metadata.find( + "swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES + ) + if create_origin is not None: + return create_origin.text + + add_to_origin = metadata.find( + "swh:deposit/swh:add_to_origin/swh:origin", namespaces=NAMESPACES + ) + if add_to_origin is not None: + return add_to_origin.text + return None diff --git a/swh/web/templates/admin/deposit.html b/swh/web/templates/admin/deposit.html --- a/swh/web/templates/admin/deposit.html +++ b/swh/web/templates/admin/deposit.html @@ -30,19 +30,21 @@
Toggle column: id - - origin - - reception date - - status - - status detail - - directory - - directory with context + type - + uri - + reception date - + status - + status detail - + directory - + directory with context

- + +
idorigintypeuri reception date status status detail