diff --git a/swh/deposit/api/private/deposit_list.py b/swh/deposit/api/private/deposit_list.py --- a/swh/deposit/api/private/deposit_list.py +++ b/swh/deposit/api/private/deposit_list.py @@ -3,15 +3,42 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - +from typing import Any, Dict +from xml.etree import ElementTree + +from django.conf import settings +from django.core.paginator import Paginator +from django.db.models import CharField, Q, TextField +from django.http import JsonResponse +from rest_framework.decorators import ( + api_view, + authentication_classes, + permission_classes, +) from rest_framework.generics import ListAPIView +from rest_framework.permissions import AllowAny +from rest_framework.request import Request +import sentry_sdk from swh.deposit.api.utils import DefaultPagination, DepositSerializer +from swh.deposit.utils import parse_swh_deposit_origin, parse_swh_metadata_provenance +from swh.model.swhids import QualifiedSWHID from . import APIPrivateView from ...models import Deposit +def _enrich_deposit_with_metadata(deposit: Deposit) -> Deposit: + deposit_requests = deposit.depositrequest_set.filter(type="metadata") + deposit_requests = deposit_requests.order_by("-id") + # enrich deposit with raw metadata when we have some + if deposit_requests and len(deposit_requests) > 0: + raw_meta = deposit_requests[0].raw_metadata + if raw_meta: + deposit.set_raw_metadata(raw_meta) + return deposit + + class APIList(ListAPIView, APIPrivateView): """Deposit request class to list the deposit's status per page. @@ -33,15 +60,7 @@ deposits = [] for deposit in page_result: - deposit_requests = deposit.depositrequest_set.filter( - type="metadata" - ).order_by("-id") - # enrich deposit with raw metadata when we have some - if deposit_requests and len(deposit_requests) > 0: - raw_meta = deposit_requests[0].raw_metadata - if raw_meta: - deposit.set_raw_metadata(raw_meta) - + _enrich_deposit_with_metadata(deposit) deposits.append(deposit) return deposits @@ -66,3 +85,114 @@ deposits_qs = deposits_qs.exclude(external_id__startswith=exclude_like) return deposits_qs.order_by("id") + + +def _deposit_search_query(search_value: str) -> Q: + fields = [f for f in Deposit._meta.fields if isinstance(f, (CharField, TextField))] + queries = [Q(**{f.name + "__icontains": search_value}) for f in fields] + search_query = Q() + for query in queries: + search_query = search_query | query + return search_query + + +@api_view() +@authentication_classes([]) +@permission_classes([AllowAny]) +def deposit_list_datatables(request: Request) -> JsonResponse: + """Special API view to list and filter deposits, produced responses are intended + to be consumed by datatables js framework used in deposits admin Web UI.""" + table_data: Dict[str, Any] = {} + table_data["draw"] = int(request.GET.get("draw", 1)) + try: + username = request.GET.get("username") + if username: + deposits = Deposit.objects.select_related("client").filter( + client__username=username + ) + else: + deposits = Deposit.objects.all() + + deposits_count = deposits.count() + search_value = request.GET.get("search[value]") + if search_value: + deposits = deposits.filter(_deposit_search_query(search_value)) + + exclude_pattern = request.GET.get("excludePattern") + if exclude_pattern: + deposits = deposits.exclude(_deposit_search_query(exclude_pattern)) + + column_order = request.GET.get("order[0][column]") + field_order = request.GET.get("columns[%s][name]" % column_order, "id") + order_dir = request.GET.get("order[0][dir]", "desc") + + if order_dir == "desc": + field_order = "-" + field_order + + deposits = deposits.order_by(field_order) + + length = int(request.GET.get("length", 10)) + page = int(request.GET.get("start", 0)) // length + 1 + paginator = Paginator(deposits, length) + + data = [ + DepositSerializer(_enrich_deposit_with_metadata(d)).data + for d in paginator.page(page).object_list + ] + + table_data["recordsTotal"] = deposits_count + table_data["recordsFiltered"] = deposits.count() + data_list = [] + for d in data: + data_dict = { + "id": d["id"], + "type": d["type"], + "external_id": d["external_id"], + "raw_metadata": d["raw_metadata"], + "reception_date": d["reception_date"], + "status": d["status"], + "status_detail": d["status_detail"], + "swhid": d["swhid"], + "swhid_context": d["swhid_context"], + } + provenance = None + raw_metadata = d["raw_metadata"] + # for meta deposit, the uri should be the url provenance + if raw_metadata and d["type"] == "meta": # metadata provenance + provenance = parse_swh_metadata_provenance( + ElementTree.fromstring(raw_metadata) + ) + # For code deposits the uri is the origin + # First, trying to determine it out of the raw metadata associated with the + # deposit + elif raw_metadata and d["type"] == "code": + create_origin_url, add_to_origin_url = parse_swh_deposit_origin( + ElementTree.fromstring(raw_metadata) + ) + provenance = create_origin_url or add_to_origin_url + + # For code deposits, if not provided, use the origin_url + if not provenance and d["type"] == "code": + if d["origin_url"]: + provenance = d["origin_url"] + + # If still not found, fallback using the swhid context + if not provenance and d["swhid_context"]: + swhid = QualifiedSWHID.from_string(d["swhid_context"]) + provenance = swhid.origin + + data_dict["uri"] = provenance # could be None + + data_list.append(data_dict) + + table_data["data"] = data_list + + except Exception as exc: + sentry_sdk.capture_exception(exc) + table_data[ + "error" + ] = "An error occurred while retrieving the list of deposits !" + if settings.DEBUG: + table_data["error"] += "\n" + str(exc) + + return JsonResponse(table_data) diff --git a/swh/deposit/api/private/urls.py b/swh/deposit/api/private/urls.py --- a/swh/deposit/api/private/urls.py +++ b/swh/deposit/api/private/urls.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -10,10 +10,11 @@ PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_GET_RAW_CONTENT, PRIVATE_LIST_DEPOSITS, + PRIVATE_LIST_DEPOSITS_DATATABLES, PRIVATE_PUT_DEPOSIT, ) from .deposit_check import APIChecks -from .deposit_list import APIList +from .deposit_list import APIList, deposit_list_datatables from .deposit_read import APIReadArchives, APIReadMetadata from .deposit_update_status import APIUpdateStatus @@ -75,4 +76,9 @@ name=PRIVATE_CHECK_DEPOSIT + "-nc", ), url(r"^deposits/$", APIList.as_view(), name=PRIVATE_LIST_DEPOSITS), + url( + r"^deposits/datatables/$", + deposit_list_datatables, + name=PRIVATE_LIST_DEPOSITS_DATATABLES, + ), ] diff --git a/swh/deposit/config.py b/swh/deposit/config.py --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -27,6 +27,7 @@ PRIVATE_PUT_DEPOSIT = "private-update" PRIVATE_GET_DEPOSIT_METADATA = "private-read" PRIVATE_LIST_DEPOSITS = "private-deposit-list" +PRIVATE_LIST_DEPOSITS_DATATABLES = "private-deposit-list-datatables" ARCHIVE_KEY = "archive" RAW_METADATA_KEY = "raw-metadata" diff --git a/swh/deposit/tests/api/test_deposit_private_list.py b/swh/deposit/tests/api/test_deposit_private_list.py --- a/swh/deposit/tests/api/test_deposit_private_list.py +++ b/swh/deposit/tests/api/test_deposit_private_list.py @@ -3,11 +3,18 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from django.http import QueryDict +from django.test import override_settings from django.urls import reverse_lazy as reverse +import pytest from rest_framework import status from swh.deposit.api.converters import convert_status_detail -from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS, PRIVATE_LIST_DEPOSITS +from swh.deposit.config import ( + DEPOSIT_STATUS_LOAD_SUCCESS, + PRIVATE_LIST_DEPOSITS, + PRIVATE_LIST_DEPOSITS_DATATABLES, +) from swh.deposit.models import DEPOSIT_CODE, DEPOSIT_METADATA_ONLY, DepositClient from swh.deposit.tests.conftest import internal_create_deposit @@ -35,6 +42,13 @@ } +@pytest.fixture() +def partial_deposit_only_metadata(partial_deposit_only_metadata): + partial_deposit_only_metadata.type = DEPOSIT_METADATA_ONLY + partial_deposit_only_metadata.save() + return partial_deposit_only_metadata + + def test_deposit_list( partial_deposit_with_metadata, partial_deposit_only_metadata, @@ -46,8 +60,6 @@ partial_deposit_with_metadata.save() deposit1 = partial_deposit_with_metadata deposit2 = partial_deposit_only_metadata - deposit2.type = DEPOSIT_METADATA_ONLY - deposit2.save() deposit3 = partial_deposit main_url = reverse(PRIVATE_LIST_DEPOSITS) @@ -170,3 +182,196 @@ id=json_response["results"][0]["client"] ) assert deposit_client.username == user.username + + +@pytest.fixture() +def deposits( + partial_deposit_with_metadata, + partial_deposit_only_metadata, + partial_deposit, + completed_deposit, + complete_deposit, +): + # to cover code extracting deposit provenance from swhid_context + complete_deposit.origin_url = None + complete_deposit.save() + + return [ + partial_deposit_with_metadata, + partial_deposit_only_metadata, + partial_deposit, + completed_deposit, + complete_deposit, + ] + + +@pytest.mark.django_db(transaction=True, reset_sequences=True) +def test_deposit_list_datatables_empty_query( + deposits, + authenticated_client, +): + url = reverse(PRIVATE_LIST_DEPOSITS_DATATABLES) + deposits_dt_data = authenticated_client.get(url).json() + assert deposits_dt_data["draw"] == 1 + assert deposits_dt_data["recordsTotal"] == len(deposits) + assert deposits_dt_data["recordsFiltered"] == len(deposits) + assert len(deposits_dt_data["data"]) == len(deposits) + # by default, deposits are sorted by decreasing ids + assert [d["id"] for d in deposits_dt_data["data"]] == list( + reversed(sorted([d.id for d in deposits])) + ) + + +@pytest.mark.django_db(transaction=True, reset_sequences=True) +@pytest.mark.parametrize("sort_direction", ["asc", "desc"]) +def test_deposit_list_datatables_ordering( + deposits, + authenticated_client, + sort_direction, +): + + deposits_date_sorted = list(sorted(deposits, key=lambda d: d.reception_date)) + + if sort_direction == "desc": + deposits_date_sorted = list(reversed(deposits_date_sorted)) + + query_params = QueryDict(mutable=True) + query_params.update( + { + "draw": 1, + "length": 10, + "start": 0, + "order[0][column]": 4, + "order[0][dir]": sort_direction, + "columns[4][name]": "reception_date", + } + ) + url = reverse(PRIVATE_LIST_DEPOSITS_DATATABLES) + "?" + query_params.urlencode() + + deposits_dt_data = authenticated_client.get(url).json() + + reception_dates = [d["reception_date"] for d in deposits_dt_data["data"]] + expected_dates = [ + d.reception_date.isoformat().replace("+00:00", "Z") + for d in deposits_date_sorted + ] + + assert reception_dates == expected_dates + + +@pytest.mark.django_db(transaction=True, reset_sequences=True) +def test_deposit_list_datatables_search( + deposits, + authenticated_client, +): + + query_params = QueryDict(mutable=True) + query_params.update( + { + "draw": 1, + "length": 10, + "start": 0, + "search[value]": DEPOSIT_STATUS_LOAD_SUCCESS, + } + ) + url = reverse(PRIVATE_LIST_DEPOSITS_DATATABLES) + "?" + query_params.urlencode() + + deposits_dt_data = authenticated_client.get(url).json() + + deposits_load_success = [ + d for d in deposits if d.status == DEPOSIT_STATUS_LOAD_SUCCESS + ] + + deposits_load_success = list( + reversed(sorted(deposits_load_success, key=lambda d: d.id)) + ) + + assert deposits_load_success + + assert [d.id for d in deposits_load_success] == [ + d["id"] for d in deposits_dt_data["data"] + ] + + +@pytest.mark.django_db(transaction=True, reset_sequences=True) +def test_deposit_list_datatables_exclude_pattern( + deposits, + authenticated_client, +): + + query_params = QueryDict(mutable=True) + query_params.update( + { + "draw": 1, + "length": 10, + "start": 0, + "excludePattern": DEPOSIT_STATUS_LOAD_SUCCESS, + } + ) + url = reverse(PRIVATE_LIST_DEPOSITS_DATATABLES) + "?" + query_params.urlencode() + + deposits_dt_data = authenticated_client.get(url).json() + + deposits_load_not_success = [ + d for d in deposits if d.status != DEPOSIT_STATUS_LOAD_SUCCESS + ] + + deposits_load_not_success = list( + reversed(sorted(deposits_load_not_success, key=lambda d: d.id)) + ) + + assert deposits_load_not_success + + assert [d.id for d in deposits_load_not_success] == [ + d["id"] for d in deposits_dt_data["data"] + ] + + +@pytest.mark.django_db(transaction=True, reset_sequences=True) +def test_deposit_list_datatables_username( + completed_deposit, + deposit_user, + deposit_another_user, + deposit_another_collection, + authenticated_client, +): + + # create a new deposit with a user different from deposit_user, + # the one that created completed_deposit + completed_deposit_another_user = internal_create_deposit( + client=deposit_another_user, + collection=deposit_another_collection, + external_id="external-id-bar", + status=DEPOSIT_STATUS_LOAD_SUCCESS, + ) + + for user, deposit in ( + (deposit_user, completed_deposit), + (deposit_another_user, completed_deposit_another_user), + ): + query_params = QueryDict(mutable=True) + query_params["username"] = user.username + url = reverse(PRIVATE_LIST_DEPOSITS_DATATABLES) + "?" + query_params.urlencode() + + deposits_dt_data = authenticated_client.get(url).json() + + assert len(deposits_dt_data["data"]) == 1 + assert deposits_dt_data["data"][0]["id"] == deposit.id + + +@pytest.mark.django_db(transaction=True, reset_sequences=True) +@override_settings(DEBUG=True) +def test_deposit_list_datatables_error( + deposits, + authenticated_client, + mocker, +): + parse_swh_metadata_provenance = mocker.patch( + "swh.deposit.api.private.deposit_list.parse_swh_metadata_provenance" + ) + error_message = "Error when parsing metadata" + parse_swh_metadata_provenance.side_effect = Exception(error_message) + url = reverse(PRIVATE_LIST_DEPOSITS_DATATABLES) + deposits_dt_data = authenticated_client.get(url).json() + assert "error" in deposits_dt_data + assert error_message in deposits_dt_data["error"]