diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py --- a/swh/web/admin/deposit.py +++ b/swh/web/admin/deposit.py @@ -3,22 +3,18 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -import sentry_sdk + +import requests +from requests.auth import HTTPBasicAuth from django.conf import settings from django.contrib.auth.decorators import user_passes_test -from django.core.paginator import Paginator from django.http import JsonResponse from django.shortcuts import render from swh.web.admin.adminurls import admin_route from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION -from swh.web.common.utils import ( - get_deposit_raw_metadata, - get_deposits_list, - parse_swh_deposit_origin, - parse_swh_metadata_provenance, -) +from swh.web.config import get_config def _can_list_deposits(user): @@ -34,102 +30,15 @@ @admin_route(r"deposit/list/", view_name="admin-deposit-list") @user_passes_test(_can_list_deposits, login_url=settings.LOGIN_URL) def _admin_deposit_list(request): - table_data = {} - table_data["draw"] = int(request.GET["draw"]) - try: - deposits = get_deposits_list(request.GET.get("username")) - deposits_count = len(deposits) - search_value = request.GET["search[value]"] - if search_value: - deposits = [ - d - for d in deposits - if any( - search_value.lower() in val - for val in [str(v).lower() for v in d.values()] - ) - ] - - exclude_pattern = request.GET.get("excludePattern") - if exclude_pattern: - deposits = [ - d - for d in deposits - if all( - exclude_pattern.lower() not in val - for val in [str(v).lower() for v in d.values()] - ) - ] - - column_order = request.GET["order[0][column]"] - field_order = request.GET["columns[%s][name]" % column_order] - order_dir = request.GET["order[0][dir]"] - - deposits = sorted(deposits, key=lambda d: d[field_order] or "") - if order_dir == "desc": - deposits = list(reversed(deposits)) - - length = int(request.GET["length"]) - page = int(request.GET["start"]) / length + 1 - paginator = Paginator(deposits, length) - data = paginator.page(page).object_list - table_data["recordsTotal"] = deposits_count - table_data["recordsFiltered"] = len(deposits) - data_list = [] - for d in data: - data_dict = { - "id": d["id"], - "type": d["type"], - "external_id": d["external_id"], - "reception_date": d["reception_date"], - "status": d["status"], - "status_detail": d["status_detail"], - "swhid": d["swhid"], - "swhid_context": d["swhid_context"], - } - provenance = None - raw_metadata = d["raw_metadata"] - # for meta deposit, the uri should be the url provenance - if raw_metadata and d["type"] == "meta": # metadata provenance - provenance = parse_swh_metadata_provenance(d["raw_metadata"]) - # For code deposits the uri is the origin - # First, trying to determine it out of the raw metadata associated with the - # deposit - elif raw_metadata and d["type"] == "code": - provenance = parse_swh_deposit_origin(raw_metadata) - - # For code deposits, if not provided, use the origin_url - if not provenance and d["type"] == "code": - if d["origin_url"]: - provenance = d["origin_url"] - - # If still not found, fallback using the swhid context - if not provenance and d["swhid_context"]: - # Trying to compute the origin as we did before in the js - from swh.model.swhids import QualifiedSWHID - - swhid = QualifiedSWHID.from_string(d["swhid_context"]) - provenance = swhid.origin - - data_dict["uri"] = provenance # could be None - - # This could be large. As this is not displayed yet, drop it to avoid - # cluttering the data dict - data_dict.pop("raw_metadata", None) - - data_list.append(data_dict) - - table_data["data"] = data_list - - for row in table_data["data"]: - metadata = get_deposit_raw_metadata(row["id"]) - if metadata: - row["raw_metadata"] = metadata - else: - row["raw_metadata"] = None - - except Exception as exc: - sentry_sdk.capture_exception(exc) - table_data["error"] = f"Could not retrieve deposits: {exc!r}" - - return JsonResponse(table_data) + config = get_config()["deposit"] + private_api_url = config["private_api_url"].rstrip("/") + "/" + deposits_list_url = private_api_url + "deposits/datatables/" + deposits_list_auth = HTTPBasicAuth( + config["private_api_user"], config["private_api_password"] + ) + + deposits = requests.get( + deposits_list_url, auth=deposits_list_auth, params=request.GET, timeout=30 + ).json() + + return JsonResponse(deposits) diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -9,7 +9,6 @@ import re from typing import Any, Callable, Dict, List, Optional import urllib.parse -from xml.etree import ElementTree from bs4 import BeautifulSoup from docutils.core import publish_parts @@ -461,7 +460,8 @@ def get_deposits_list(username: Optional[str] = None) -> List[Dict[str, Any]]: """Return the list of software deposits using swh-deposit API""" config = get_config()["deposit"] - deposits_list_base_url = config["private_api_url"] + "deposits" + private_api_url = config["private_api_url"].rstrip("/") + "/" + deposits_list_base_url = private_api_url + "deposits" deposits_list_auth = HTTPBasicAuth( config["private_api_user"], config["private_api_password"] ) @@ -490,13 +490,6 @@ return deposits_data["results"] -@django_cache() -def get_deposit_raw_metadata(deposit_id: int) -> Optional[str]: - config = get_config()["deposit"] - url = f"{config['private_api_url']}/{deposit_id}/meta" - return requests.get(url).json()["raw_metadata"] - - _origin_visit_types_cache_timeout = 24 * 60 * 60 # 24 hours @@ -525,73 +518,6 @@ ) -NAMESPACES = { - "swh": "https://www.softwareheritage.org/schema/2018/deposit", - "schema": "http://schema.org/", -} - - -def parse_swh_metadata_provenance(raw_metadata: str) -> Optional[str]: - """Parse swh metadata-provenance out of the raw metadata deposit. If found, returns the - value, None otherwise. - - .. code-block:: xml - - - - https://example.org/metadata/url - - - - Args: - raw_metadata: raw metadata out of deposits received - - Returns: - Either the metadata provenance url if any or None otherwise - - """ - metadata = ElementTree.fromstring(raw_metadata) - url = metadata.findtext( - "swh:deposit/swh:metadata-provenance/schema:url", - namespaces=NAMESPACES, - ) - return url or None - - -def parse_swh_deposit_origin(raw_metadata: str) -> Optional[str]: - """Parses and from metadata document, - if any. They are mutually exclusive and tested as such in the deposit. - - .. code-block:: xml - - - - - - - - .. code-block:: xml - - - - - - - - Returns: - The one not null if any, None otherwise - - """ - metadata = ElementTree.fromstring(raw_metadata) - for origin_tag in ["create_origin", "add_to_origin"]: - elt = metadata.find( - f"swh:deposit/swh:{origin_tag}/swh:origin[@url]", namespaces=NAMESPACES - ) - if elt is not None: - return elt.attrib["url"] - return None - - def has_add_forge_now_permission(user) -> bool: """Is a user considered an add-forge-now moderator? diff --git a/swh/web/tests/admin/test_deposit.py b/swh/web/tests/admin/test_deposit.py --- a/swh/web/tests/admin/test_deposit.py +++ b/swh/web/tests/admin/test_deposit.py @@ -1,13 +1,20 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from base64 import b64encode + import pytest from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION from swh.web.common.utils import reverse -from swh.web.tests.utils import check_html_get_response, create_django_permission +from swh.web.config import get_config +from swh.web.tests.utils import ( + check_html_get_response, + check_http_get_response, + create_django_permission, +) def test_deposit_admin_view_not_available_for_anonymous_user(client): @@ -35,3 +42,60 @@ check_html_get_response( client, url, status_code=200, template_used="admin/deposit.html" ) + + +@pytest.mark.django_db +def test_deposit_admin_view_list_deposits(client, staff_user, requests_mock): + deposits_data = { + "data": [ + { + "external_id": "hal-02527986", + "id": 1066, + "raw_metadata": None, + "reception_date": "2022-04-08T14:12:34.143000Z", + "status": "rejected", + "status_detail": None, + "swhid": None, + "swhid_context": None, + "type": "code", + "uri": "https://inria.halpreprod.archives-ouvertes.fr/hal-02527986", + }, + { + "external_id": "hal-01243573", + "id": 1065, + "raw_metadata": None, + "reception_date": "2022-04-08T12:53:50.940000Z", + "status": "rejected", + "status_detail": None, + "swhid": None, + "swhid_context": None, + "type": "code", + "uri": "https://inria.halpreprod.archives-ouvertes.fr/hal-01243573", + }, + ], + "draw": 2, + "recordsFiltered": 645, + "recordsTotal": 1066, + } + + config = get_config()["deposit"] + private_api_url = config["private_api_url"].rstrip("/") + "/" + deposits_list_url = private_api_url + "deposits/datatables/" + + basic_auth_payload = ( + config["private_api_user"] + ":" + config["private_api_password"] + ).encode() + + requests_mock.get( + deposits_list_url, + json=deposits_data, + request_headers={ + "Authorization": f"Basic {b64encode(basic_auth_payload).decode('ascii')}" + }, + ) + + client.force_login(staff_user) + url = reverse("admin-deposit-list") + check_http_get_response( + client, url, status_code=200, content_type="application/json" + ) diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -5,7 +5,6 @@ from base64 import b64encode import datetime import math -from os.path import join import sys from urllib.parse import quote @@ -272,7 +271,8 @@ } config = get_config()["deposit"] - deposits_list_url = config["private_api_url"] + "deposits" + private_api_url = config["private_api_url"].rstrip("/") + "/" + deposits_list_url = private_api_url + "deposits" basic_auth_payload = ( config["private_api_user"] + ":" + config["private_api_password"] @@ -318,47 +318,6 @@ assert utils.is_swh_web_production(request) -@pytest.mark.parametrize( - "raw_metadata_file,expected_url", - [ - ("raw-metadata-provenance.xml", "https://example.org/metadata/provenance"), - ("raw-metadata-no-swh.xml", None), - ], -) -def test_parse_swh_provenance(datadir, raw_metadata_file, expected_url): - metadata_path = join(datadir, "deposit", raw_metadata_file) - with open(metadata_path, "r") as f: - raw_metadata = f.read() - - actual_url = utils.parse_swh_metadata_provenance(raw_metadata) - - assert actual_url == expected_url - - -@pytest.mark.parametrize( - "raw_metadata_file,expected_url", - [ - ( - "raw-metadata-create-origin.xml", - "https://example.org/metadata/create-origin", - ), - ( - "raw-metadata-add-to-origin.xml", - "https://example.org/metadata/add-to-origin", - ), - ("raw-metadata-no-swh.xml", None), - ], -) -def test_parse_swh_origins(datadir, raw_metadata_file, expected_url): - metadata_path = join(datadir, "deposit", raw_metadata_file) - with open(metadata_path, "r") as f: - raw_metadata = f.read() - - actual_url = utils.parse_swh_deposit_origin(raw_metadata) - - assert actual_url == expected_url - - def add(x, y): return x + y