diff --git a/assets/src/bundles/admin/deposit.js b/assets/src/bundles/admin/deposit.js
index 7539f41a..7d7f98d4 100644
--- a/assets/src/bundles/admin/deposit.js
+++ b/assets/src/bundles/admin/deposit.js
@@ -1,166 +1,163 @@
/**
- * Copyright (C) 2018-2021 The Software Heritage developers
+ * Copyright (C) 2018-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
function genSwhLink(data, type) {
- if (type === 'display') {
- if (data && data.startsWith('swh')) {
- const browseUrl = Urls.browse_swhid(data);
- const formattedSWHID = data.replace(/;/g, ';
');
- return `${formattedSWHID}`;
- }
+ if (type === 'display' && data && data.startsWith('swh')) {
+ const browseUrl = Urls.browse_swhid(data);
+ const formattedSWHID = data.replace(/;/g, ';
');
+ return `${formattedSWHID}`;
+ }
+ return data;
+}
+
+function genLink(data, type) {
+ if (type === 'display' && data) {
+ const sData = encodeURI(data);
+ return `${sData}`;
}
return data;
}
export function initDepositAdmin(username, isStaff) {
let depositsTable;
$(document).ready(() => {
$.fn.dataTable.ext.errMode = 'none';
depositsTable = $('#swh-admin-deposit-list')
.on('error.dt', (e, settings, techNote, message) => {
$('#swh-admin-deposit-list-error').text(message);
})
.DataTable({
serverSide: true,
processing: true,
// let's define the order of table options display
// f: (f)ilter
// l: (l)ength changing
// r: p(r)ocessing
// t: (t)able
// i: (i)nfo
// p: (p)agination
// see https://datatables.net/examples/basic_init/dom.html
dom: '<<"d-flex justify-content-between align-items-center"f' +
'<"#list-exclude">l>rt<"bottom"ip>>',
// div#list-exclude is a custom filter added next to dataTable
// initialization below through js dom manipulation, see
// https://datatables.net/examples/advanced_init/dom_toolbar.html
ajax: {
url: Urls.admin_deposit_list(),
data: d => {
d.excludePattern = $('#swh-admin-deposit-list-exclude-filter').val();
if (!isStaff) {
d.username = username;
}
}
},
columns: [
{
data: 'id',
name: 'id'
},
{
- data: 'swhid_context',
- name: 'swhid_context',
+ data: 'type',
+ name: 'type'
+ },
+ {
+ data: 'uri',
+ name: 'uri',
render: (data, type, row) => {
- if (data && type === 'display') {
- const originPattern = ';origin=';
- const originPatternIdx = data.indexOf(originPattern);
- if (originPatternIdx !== -1) {
- let originUrl = data.slice(originPatternIdx + originPattern.length);
- const nextSepPattern = ';';
- const nextSepPatternIdx = originUrl.indexOf(nextSepPattern);
- if (nextSepPatternIdx !== -1) { /* Remove extra context */
- originUrl = originUrl.slice(0, nextSepPatternIdx);
- }
- return `${originUrl}`;
- }
- }
- return data;
+ return genLink(data, type);
}
},
{
data: 'reception_date',
name: 'reception_date',
render: (data, type, row) => {
if (type === 'display') {
const date = new Date(data);
return date.toLocaleString();
}
return data;
}
},
{
data: 'status',
name: 'status'
},
{
data: 'status_detail',
name: 'status_detail',
render: (data, type, row) => {
if (type === 'display' && data) {
let text = data;
if (typeof data === 'object') {
text = JSON.stringify(data, null, 4);
}
return `
${text}
`;
}
return data;
},
orderable: false,
visible: false
},
{
data: 'swhid',
name: 'swhid',
render: (data, type, row) => {
return genSwhLink(data, type);
},
orderable: false,
visible: false
},
{
data: 'swhid_context',
name: 'swhid_context',
render: (data, type, row) => {
return genSwhLink(data, type);
},
orderable: false,
visible: false
}
],
scrollX: true,
scrollY: '50vh',
scrollCollapse: true,
order: [[0, 'desc']]
});
// Some more customization is needed on the table
$('div#list-exclude').html(`
`);
// Adding exclusion pattern update behavior, when typing, update search
$('#swh-admin-deposit-list-exclude-filter').keyup(function() {
depositsTable.draw();
});
// at last draw the table
depositsTable.draw();
});
$('a.toggle-col').on('click', function(e) {
e.preventDefault();
var column = depositsTable.column($(this).attr('data-column'));
column.visible(!column.visible());
if (column.visible()) {
$(this).removeClass('col-hidden');
} else {
$(this).addClass('col-hidden');
}
});
}
diff --git a/cypress/integration/deposit-admin.spec.js b/cypress/integration/deposit-admin.spec.js
index ae21f453..4b308e4a 100644
--- a/cypress/integration/deposit-admin.spec.js
+++ b/cypress/integration/deposit-admin.spec.js
@@ -1,156 +1,164 @@
/**
- * Copyright (C) 2020-2021 The Software Heritage developers
+ * Copyright (C) 2020-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
// data to use as request query response
let responseDeposits;
let expectedOrigins;
describe('Test admin deposit page', function() {
beforeEach(() => {
responseDeposits = [
{
'id': 614,
+ 'type': 'code',
'external_id': 'ch-de-1',
'reception_date': '2020-05-18T13:48:27Z',
'status': 'done',
'status_detail': null,
'swhid': 'swh:1:dir:ef04a768',
- 'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/'
+ 'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/',
+ 'uri': 'https://w.s.o/c-d-1'
},
{
'id': 613,
+ 'type': 'code',
'external_id': 'ch-de-2',
'reception_date': '2020-05-18T11:20:16Z',
'status': 'done',
'status_detail': null,
'swhid': 'swh:1:dir:181417fb',
- 'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/'
+ 'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/',
+ 'uri': 'https://w.s.o/c-d-2'
},
{
'id': 612,
+ 'type': 'code',
'external_id': 'ch-de-3',
'reception_date': '2020-05-18T11:20:16Z',
'status': 'rejected',
'status_detail': 'incomplete deposit!',
'swhid': null,
- 'swhid_context': null
+ 'swhid_context': null,
+ 'uri': null
}
];
// those are computed from the
expectedOrigins = {
614: 'https://w.s.o/c-d-1',
613: 'https://w.s.o/c-d-2',
612: ''
};
});
it('Should display properly entries', function() {
cy.adminLogin();
const testDeposits = responseDeposits;
cy.intercept(`${this.Urls.admin_deposit_list()}**`, {
body: {
'draw': 10,
'recordsTotal': testDeposits.length,
'recordsFiltered': testDeposits.length,
'data': testDeposits
}
}).as('listDeposits');
cy.visit(this.Urls.admin_deposit());
cy.location('pathname')
.should('be.equal', this.Urls.admin_deposit());
cy.url().should('include', '/admin/deposit');
cy.get('#swh-admin-deposit-list')
.should('exist');
cy.wait('@listDeposits').then((xhr) => {
cy.log('response:', xhr.response);
cy.log(xhr.response.body);
const deposits = xhr.response.body.data;
cy.log('Deposits: ', deposits);
expect(deposits.length).to.equal(testDeposits.length);
cy.get('#swh-admin-deposit-list').find('tbody > tr').as('rows');
// only 2 entries
cy.get('@rows').each((row, idx, collection) => {
const deposit = deposits[idx];
const responseDeposit = testDeposits[idx];
assert.isNotNull(deposit);
assert.isNotNull(responseDeposit);
expect(deposit.id).to.be.equal(responseDeposit['id']);
+ expect(deposit.uri).to.be.equal(responseDeposit['uri']);
+ expect(deposit.type).to.be.equal(responseDeposit['type']);
expect(deposit.external_id).to.be.equal(responseDeposit['external_id']);
expect(deposit.status).to.be.equal(responseDeposit['status']);
expect(deposit.status_detail).to.be.equal(responseDeposit['status_detail']);
expect(deposit.swhid).to.be.equal(responseDeposit['swhid']);
expect(deposit.swhid_context).to.be.equal(responseDeposit['swhid_context']);
const expectedOrigin = expectedOrigins[deposit.id];
// ensure it's in the dom
cy.contains(deposit.id).should('be.visible');
if (deposit.status !== 'rejected') {
expect(row).to.not.contain(deposit.external_id);
cy.contains(expectedOrigin).should('be.visible');
}
cy.contains(deposit.status).should('be.visible');
// those are hidden by default, so now visible
if (deposit.status_detail !== null) {
cy.contains(deposit.status_detail).should('not.exist');
}
// those are hidden by default
if (deposit.swhid !== null) {
cy.contains(deposit.swhid).should('not.exist');
cy.contains(deposit.swhid_context).should('not.exist');
}
});
// toggling all links and ensure, the previous checks are inverted
cy.get('a.toggle-col').click({'multiple': true}).then(() => {
cy.get('#swh-admin-deposit-list').find('tbody > tr').as('rows');
cy.get('@rows').each((row, idx, collection) => {
const deposit = deposits[idx];
const expectedOrigin = expectedOrigins[deposit.id];
// ensure it's in the dom
cy.contains(deposit.id).should('not.exist');
if (deposit.status !== 'rejected') {
expect(row).to.not.contain(deposit.external_id);
expect(row).to.contain(expectedOrigin);
}
expect(row).to.not.contain(deposit.status);
// those are hidden by default, so now visible
if (deposit.status_detail !== null) {
cy.contains(deposit.status_detail).should('be.visible');
}
// those are hidden by default, so now they should be visible
if (deposit.swhid !== null) {
cy.contains(deposit.swhid).should('be.visible');
cy.contains(deposit.swhid_context).should('be.visible');
// check SWHID link text formatting
cy.contains(deposit.swhid_context).then(elt => {
expect(elt[0].innerHTML).to.equal(deposit.swhid_context.replace(/;/g, ';
'));
});
}
});
});
cy.get('#swh-admin-deposit-list-error')
.should('not.contain',
'An error occurred while retrieving the list of deposits');
});
});
});
diff --git a/swh/web/admin/deposit.py b/swh/web/admin/deposit.py
index e6bb3cf5..ddbfe304 100644
--- a/swh/web/admin/deposit.py
+++ b/swh/web/admin/deposit.py
@@ -1,92 +1,124 @@
-# Copyright (C) 2018-2021 The Software Heritage developers
+# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import sentry_sdk
from django.conf import settings
from django.contrib.auth.decorators import user_passes_test
from django.core.paginator import Paginator
from django.http import JsonResponse
from django.shortcuts import render
from swh.web.admin.adminurls import admin_route
from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION
-from swh.web.common.utils import get_deposits_list
+from swh.web.common.utils import (
+ get_deposits_list,
+ parse_swh_deposit_origin,
+ parse_swh_metadata_provenance,
+)
def _can_list_deposits(user):
return user.is_staff or user.has_perm(ADMIN_LIST_DEPOSIT_PERMISSION)
@admin_route(r"deposit/", view_name="admin-deposit")
@user_passes_test(_can_list_deposits, login_url=settings.LOGIN_URL)
def _admin_origin_save(request):
return render(request, "admin/deposit.html")
@admin_route(r"deposit/list/", view_name="admin-deposit-list")
@user_passes_test(_can_list_deposits, login_url=settings.LOGIN_URL)
def _admin_deposit_list(request):
table_data = {}
table_data["draw"] = int(request.GET["draw"])
try:
deposits = get_deposits_list(request.GET.get("username"))
deposits_count = len(deposits)
search_value = request.GET["search[value]"]
if search_value:
deposits = [
d
for d in deposits
if any(
search_value.lower() in val
for val in [str(v).lower() for v in d.values()]
)
]
exclude_pattern = request.GET.get("excludePattern")
if exclude_pattern:
deposits = [
d
for d in deposits
if all(
exclude_pattern.lower() not in val
for val in [str(v).lower() for v in d.values()]
)
]
column_order = request.GET["order[0][column]"]
field_order = request.GET["columns[%s][name]" % column_order]
order_dir = request.GET["order[0][dir]"]
deposits = sorted(deposits, key=lambda d: d[field_order] or "")
if order_dir == "desc":
deposits = list(reversed(deposits))
length = int(request.GET["length"])
page = int(request.GET["start"]) / length + 1
paginator = Paginator(deposits, length)
data = paginator.page(page).object_list
table_data["recordsTotal"] = deposits_count
table_data["recordsFiltered"] = len(deposits)
- table_data["data"] = [
- {
+ data_list = []
+ for d in data:
+ data_dict = {
"id": d["id"],
+ "type": d["type"],
"external_id": d["external_id"],
"reception_date": d["reception_date"],
"status": d["status"],
"status_detail": d["status_detail"],
"swhid": d["swhid"],
"swhid_context": d["swhid_context"],
}
- for d in data
- ]
+ provenance = None
+ raw_metadata = d["raw_metadata"]
+ # Try to determine provenance out of the raw metadata
+ if raw_metadata and d["type"] == "meta": # metadata provenance
+ provenance = parse_swh_metadata_provenance(d["raw_metadata"])
+ elif raw_metadata and d["type"] == "code":
+ provenance = parse_swh_deposit_origin(raw_metadata)
+
+ if not provenance and d["origin_url"]:
+ provenance = d["origin_url"]
+
+ # Finally, if still not found, we determine uri using the swhid
+ if not provenance and d["swhid_context"]:
+ # Trying to compute the origin as we did before in the js
+ from swh.model.swhids import QualifiedSWHID
+
+ swhid = QualifiedSWHID.from_string(d["swhid_context"])
+ provenance = swhid.origin
+
+ data_dict["uri"] = provenance # could be None
+
+ # This could be large. As this is not displayed yet, drop it to avoid
+ # cluttering the data dict
+ data_dict.pop("raw_metadata", None)
+
+ data_list.append(data_dict)
+
+ table_data["data"] = data_list
except Exception as exc:
sentry_sdk.capture_exception(exc)
- table_data["error"] = (
- "An error occurred while retrieving " "the list of deposits !"
- )
+ table_data[
+ "error"
+ ] = "An error occurred while retrieving the list of deposits !"
return JsonResponse(table_data)
diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py
index 0ffc772d..28b28c33 100644
--- a/swh/web/common/utils.py
+++ b/swh/web/common/utils.py
@@ -1,449 +1,516 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timezone
import os
import re
from typing import Any, Dict, List, Optional
import urllib.parse
+from xml.etree import ElementTree
from bs4 import BeautifulSoup
from docutils.core import publish_parts
import docutils.parsers.rst
import docutils.utils
from docutils.writers.html5_polyglot import HTMLTranslator, Writer
from iso8601 import ParseError, parse_date
from pkg_resources import get_distribution
from prometheus_client.registry import CollectorRegistry
import requests
from requests.auth import HTTPBasicAuth
from django.core.cache import cache
from django.http import HttpRequest, QueryDict
from django.shortcuts import redirect
from django.urls import resolve
from django.urls import reverse as django_reverse
from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION
from swh.web.common.exc import BadInputExc
from swh.web.common.typing import QueryParameters
from swh.web.config import SWH_WEB_SERVER_NAME, get_config, search
SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True)
swh_object_icons = {
"alias": "mdi mdi-star",
"branch": "mdi mdi-source-branch",
"branches": "mdi mdi-source-branch",
"content": "mdi mdi-file-document",
"cnt": "mdi mdi-file-document",
"directory": "mdi mdi-folder",
"dir": "mdi mdi-folder",
"origin": "mdi mdi-source-repository",
"ori": "mdi mdi-source-repository",
"person": "mdi mdi-account",
"revisions history": "mdi mdi-history",
"release": "mdi mdi-tag",
"rel": "mdi mdi-tag",
"releases": "mdi mdi-tag",
"revision": "mdi mdi-rotate-90 mdi-source-commit",
"rev": "mdi mdi-rotate-90 mdi-source-commit",
"snapshot": "mdi mdi-camera",
"snp": "mdi mdi-camera",
"visits": "mdi mdi-calendar-month",
}
def reverse(
viewname: str,
url_args: Optional[Dict[str, Any]] = None,
query_params: Optional[QueryParameters] = None,
current_app: Optional[str] = None,
urlconf: Optional[str] = None,
request: Optional[HttpRequest] = None,
) -> str:
"""An override of django reverse function supporting query parameters.
Args:
viewname: the name of the django view from which to compute a url
url_args: dictionary of url arguments indexed by their names
query_params: dictionary of query parameters to append to the
reversed url
current_app: the name of the django app tighten to the view
urlconf: url configuration module
request: build an absolute URI if provided
Returns:
str: the url of the requested view with processed arguments and
query parameters
"""
if url_args:
url_args = {k: v for k, v in url_args.items() if v is not None}
url = django_reverse(
viewname, urlconf=urlconf, kwargs=url_args, current_app=current_app
)
if query_params:
query_params = {k: v for k, v in query_params.items() if v is not None}
if query_params and len(query_params) > 0:
query_dict = QueryDict("", mutable=True)
for k in sorted(query_params.keys()):
query_dict[k] = query_params[k]
url += "?" + query_dict.urlencode(safe="/;:")
if request is not None:
url = request.build_absolute_uri(url)
return url
def datetime_to_utc(date):
"""Returns datetime in UTC without timezone info
Args:
date (datetime.datetime): input datetime with timezone info
Returns:
datetime.datetime: datetime in UTC without timezone info
"""
if date.tzinfo and date.tzinfo != timezone.utc:
return date.astimezone(tz=timezone.utc)
else:
return date
def parse_iso8601_date_to_utc(iso_date: str) -> datetime:
"""Given an ISO 8601 datetime string, parse the result as UTC datetime.
Returns:
a timezone-aware datetime representing the parsed date
Raises:
swh.web.common.exc.BadInputExc: provided date does not respect ISO 8601 format
Samples:
- 2016-01-12
- 2016-01-12T09:19:12+0100
- 2007-01-14T20:34:22Z
"""
try:
date = parse_date(iso_date)
return datetime_to_utc(date)
except ParseError as e:
raise BadInputExc(e)
def shorten_path(path):
"""Shorten the given path: for each hash present, only return the first
8 characters followed by an ellipsis"""
sha256_re = r"([0-9a-f]{8})[0-9a-z]{56}"
sha1_re = r"([0-9a-f]{8})[0-9a-f]{32}"
ret = re.sub(sha256_re, r"\1...", path)
return re.sub(sha1_re, r"\1...", ret)
def format_utc_iso_date(iso_date, fmt="%d %B %Y, %H:%M UTC"):
"""Turns a string representation of an ISO 8601 datetime string
to UTC and format it into a more human readable one.
For instance, from the following input
string: '2017-05-04T13:27:13+02:00' the following one
is returned: '04 May 2017, 11:27 UTC'.
Custom format string may also be provided
as parameter
Args:
iso_date (str): a string representation of an ISO 8601 date
fmt (str): optional date formatting string
Returns:
str: a formatted string representation of the input iso date
"""
if not iso_date:
return iso_date
date = parse_iso8601_date_to_utc(iso_date)
return date.strftime(fmt)
def gen_path_info(path):
"""Function to generate path data navigation for use
with a breadcrumb in the swh web ui.
For instance, from a path /folder1/folder2/folder3,
it returns the following list::
[{'name': 'folder1', 'path': 'folder1'},
{'name': 'folder2', 'path': 'folder1/folder2'},
{'name': 'folder3', 'path': 'folder1/folder2/folder3'}]
Args:
path: a filesystem path
Returns:
list: a list of path data for navigation as illustrated above.
"""
path_info = []
if path:
sub_paths = path.strip("/").split("/")
path_from_root = ""
for p in sub_paths:
path_from_root += "/" + p
path_info.append({"name": p, "path": path_from_root.strip("/")})
return path_info
def parse_rst(text, report_level=2):
"""
Parse a reStructuredText string with docutils.
Args:
text (str): string with reStructuredText markups in it
report_level (int): level of docutils report messages to print
(1 info 2 warning 3 error 4 severe 5 none)
Returns:
docutils.nodes.document: a parsed docutils document
"""
parser = docutils.parsers.rst.Parser()
components = (docutils.parsers.rst.Parser,)
settings = docutils.frontend.OptionParser(
components=components
).get_default_values()
settings.report_level = report_level
document = docutils.utils.new_document("rst-doc", settings=settings)
parser.parse(text, document)
return document
def get_client_ip(request):
"""
Return the client IP address from an incoming HTTP request.
Args:
request (django.http.HttpRequest): the incoming HTTP request
Returns:
str: The client IP address
"""
x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR")
if x_forwarded_for:
ip = x_forwarded_for.split(",")[0]
else:
ip = request.META.get("REMOTE_ADDR")
return ip
def is_swh_web_development(request: HttpRequest) -> bool:
"""Indicate if we are running a development version of swh-web.
"""
site_base_url = request.build_absolute_uri("/")
return any(
host in site_base_url for host in ("localhost", "127.0.0.1", "testserver")
)
def is_swh_web_staging(request: HttpRequest) -> bool:
"""Indicate if we are running a staging version of swh-web.
"""
config = get_config()
site_base_url = request.build_absolute_uri("/")
return any(
server_name in site_base_url for server_name in config["staging_server_names"]
)
def is_swh_web_production(request: HttpRequest) -> bool:
"""Indicate if we are running the public production version of swh-web.
"""
return SWH_WEB_SERVER_NAME in request.build_absolute_uri("/")
browsers_supported_image_mimes = set(
[
"image/gif",
"image/png",
"image/jpeg",
"image/bmp",
"image/webp",
"image/svg",
"image/svg+xml",
]
)
def context_processor(request):
"""
Django context processor used to inject variables
in all swh-web templates.
"""
config = get_config()
if (
hasattr(request, "user")
and request.user.is_authenticated
and not hasattr(request.user, "backend")
):
# To avoid django.template.base.VariableDoesNotExist errors
# when rendering templates when standard Django user is logged in.
request.user.backend = "django.contrib.auth.backends.ModelBackend"
return {
"swh_object_icons": swh_object_icons,
"available_languages": None,
"swh_client_config": config["client_config"],
"oidc_enabled": bool(config["keycloak"]["server_url"]),
"browsers_supported_image_mimes": browsers_supported_image_mimes,
"keycloak": config["keycloak"],
"site_base_url": request.build_absolute_uri("/"),
"DJANGO_SETTINGS_MODULE": os.environ["DJANGO_SETTINGS_MODULE"],
"status": config["status"],
"swh_web_dev": is_swh_web_development(request),
"swh_web_staging": is_swh_web_staging(request),
"swh_web_version": get_distribution("swh.web").version,
"iframe_mode": False,
"ADMIN_LIST_DEPOSIT_PERMISSION": ADMIN_LIST_DEPOSIT_PERMISSION,
}
def resolve_branch_alias(
snapshot: Dict[str, Any], branch: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""
Resolve branch alias in snapshot content.
Args:
snapshot: a full snapshot content
branch: a branch alias contained in the snapshot
Returns:
The real snapshot branch that got aliased.
"""
while branch and branch["target_type"] == "alias":
if branch["target"] in snapshot["branches"]:
branch = snapshot["branches"][branch["target"]]
else:
from swh.web.common import archive
snp = archive.lookup_snapshot(
snapshot["id"], branches_from=branch["target"], branches_count=1
)
if snp and branch["target"] in snp["branches"]:
branch = snp["branches"][branch["target"]]
else:
branch = None
return branch
class _NoHeaderHTMLTranslator(HTMLTranslator):
"""
Docutils translator subclass to customize the generation of HTML
from reST-formatted docstrings
"""
def __init__(self, document):
super().__init__(document)
self.body_prefix = []
self.body_suffix = []
_HTML_WRITER = Writer()
_HTML_WRITER.translator_class = _NoHeaderHTMLTranslator
def rst_to_html(rst: str) -> str:
"""
Convert reStructuredText document into HTML.
Args:
rst: A string containing a reStructuredText document
Returns:
Body content of the produced HTML conversion.
"""
settings = {
"initial_header_level": 2,
"halt_level": 4,
"traceback": True,
}
pp = publish_parts(rst, writer=_HTML_WRITER, settings_overrides=settings)
return f'{pp["html_body"]}
'
def prettify_html(html: str) -> str:
"""
Prettify an HTML document.
Args:
html: Input HTML document
Returns:
The prettified HTML document
"""
return BeautifulSoup(html, "lxml").prettify()
def _deposits_list_url(
deposits_list_base_url: str, page_size: int, username: Optional[str]
) -> str:
params = {"page_size": str(page_size)}
if username is not None:
params["username"] = username
return f"{deposits_list_base_url}?{urllib.parse.urlencode(params)}"
def get_deposits_list(username: Optional[str] = None) -> List[Dict[str, Any]]:
"""Return the list of software deposits using swh-deposit API
"""
config = get_config()["deposit"]
deposits_list_base_url = config["private_api_url"] + "deposits"
deposits_list_auth = HTTPBasicAuth(
config["private_api_user"], config["private_api_password"]
)
deposits_list_url = _deposits_list_url(
deposits_list_base_url, page_size=1, username=username
)
nb_deposits = requests.get(
deposits_list_url, auth=deposits_list_auth, timeout=30
).json()["count"]
deposits_data = cache.get(f"swh-deposit-list-{username}")
if not deposits_data or deposits_data["count"] != nb_deposits:
deposits_list_url = _deposits_list_url(
deposits_list_base_url, page_size=nb_deposits, username=username
)
deposits_data = requests.get(
deposits_list_url, auth=deposits_list_auth, timeout=30,
).json()
cache.set(f"swh-deposit-list-{username}", deposits_data)
return deposits_data["results"]
def origin_visit_types() -> List[str]:
"""Return the exhaustive list of visit types for origins
ingested into the archive.
"""
try:
return sorted(search().visit_types_count().keys())
except Exception:
return []
def redirect_to_new_route(request, new_route, permanent=True):
"""Redirect a request to another route with url args and query parameters
eg: /origin//log?path=test can be redirected as
/log?url=&path=test. This can be used to deprecate routes
"""
request_path = resolve(request.path_info)
args = {**request_path.kwargs, **request.GET.dict()}
return redirect(reverse(new_route, query_params=args), permanent=permanent,)
+
+
+NAMESPACES = {
+ "swh": "https://www.softwareheritage.org/schema/2018/deposit",
+ "schema": "http://schema.org/",
+}
+
+
+def parse_swh_metadata_provenance(raw_metadata: str) -> Optional[str]:
+ """Parse swh metadata-provenance out of the raw metadata deposit. If found, returns the
+ value, None otherwise.
+
+ .. code-block:: xml
+
+
+
+ https://example.org/metadata/url
+
+
+
+ Args:
+ raw_metadata: raw metadata out of deposits received
+
+ Returns:
+ Either the metadata provenance url if any or None otherwise
+
+ """
+ metadata = ElementTree.fromstring(raw_metadata)
+ url = metadata.findtext(
+ "swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES,
+ )
+ return url or None
+
+
+def parse_swh_deposit_origin(raw_metadata: str) -> Optional[str]:
+ """Parses and from metadata document,
+ if any. They are mutually exclusive and tested as such in the deposit.
+
+ .. code-block:: xml
+
+
+
+
+
+
+
+ .. code-block:: xml
+
+
+
+
+
+
+
+ Returns:
+ The one not null if any, None otherwise
+
+ """
+ metadata = ElementTree.fromstring(raw_metadata)
+ for origin_tag in ["create_origin", "add_to_origin"]:
+ elt = metadata.find(
+ f"swh:deposit/swh:{origin_tag}/swh:origin[@url]", namespaces=NAMESPACES
+ )
+ if elt is not None:
+ return elt.attrib["url"]
+ return None
diff --git a/swh/web/templates/admin/deposit.html b/swh/web/templates/admin/deposit.html
index ba4e8b23..ac64670b 100644
--- a/swh/web/templates/admin/deposit.html
+++ b/swh/web/templates/admin/deposit.html
@@ -1,59 +1,61 @@
{% extends "layout.html" %}
{% comment %}
Copyright (C) 2018-2021 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% load swh_templatetags %}
{% load render_bundle from webpack_loader %}
{% block header %}
{{ block.super }}
{% render_bundle 'admin' %}
{% endblock %}
{% block title %} Deposit administration {% endblock %}
{% block navbar-content %}
Deposit administration
{% endblock %}
{% block content %}
The table below displays the list of software artifacts deposited to
Software Heritage.
id |
- origin |
+ type |
+ uri |
reception date |
status |
status detail |
directory |
directory with context |
{% endblock content %}
diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py
index 31593a48..4d39a2ba 100644
--- a/swh/web/tests/common/test_utils.py
+++ b/swh/web/tests/common/test_utils.py
@@ -1,316 +1,357 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
from base64 import b64encode
import datetime
+from os.path import join
from urllib.parse import quote
import pytest
from django.conf.urls import url
from django.test.utils import override_settings
from django.urls.exceptions import NoReverseMatch
from swh.web.common import utils
from swh.web.common.exc import BadInputExc
from swh.web.config import SWH_WEB_SERVER_NAME, SWH_WEB_STAGING_SERVER_NAMES, get_config
def test_shorten_path_noop():
noops = ["/api/", "/browse/", "/content/symbol/foobar/"]
for noop in noops:
assert utils.shorten_path(noop) == noop
def test_shorten_path_sha1():
sha1 = "aafb16d69fd30ff58afdd69036a26047f3aebdc6"
short_sha1 = sha1[:8] + "..."
templates = [
"/api/1/content/sha1:%s/",
"/api/1/content/sha1_git:%s/",
"/api/1/directory/%s/",
"/api/1/content/sha1:%s/ctags/",
]
for template in templates:
assert utils.shorten_path(template % sha1) == template % short_sha1
def test_shorten_path_sha256():
sha256 = "aafb16d69fd30ff58afdd69036a26047" "213add102934013a014dfca031c41aef"
short_sha256 = sha256[:8] + "..."
templates = [
"/api/1/content/sha256:%s/",
"/api/1/directory/%s/",
"/api/1/content/sha256:%s/filetype/",
]
for template in templates:
assert utils.shorten_path(template % sha256) == template % short_sha256
@pytest.mark.parametrize(
"input_timestamp, output_date",
[
(
"2016-01-12",
datetime.datetime(2016, 1, 12, 0, 0, tzinfo=datetime.timezone.utc),
),
(
"2016-01-12T09:19:12+0100",
datetime.datetime(2016, 1, 12, 8, 19, 12, tzinfo=datetime.timezone.utc),
),
(
"2007-01-14T20:34:22Z",
datetime.datetime(2007, 1, 14, 20, 34, 22, tzinfo=datetime.timezone.utc),
),
],
)
def test_parse_iso8601_date_to_utc_ok(input_timestamp, output_date):
assert utils.parse_iso8601_date_to_utc(input_timestamp) == output_date
@pytest.mark.parametrize(
"invalid_iso8601_timestamp", ["Today is January 1, 2047 at 8:21:00AM", "1452591542"]
)
def test_parse_iso8601_date_to_utc_ko(invalid_iso8601_timestamp):
with pytest.raises(BadInputExc):
utils.parse_iso8601_date_to_utc(invalid_iso8601_timestamp)
def test_format_utc_iso_date():
assert (
utils.format_utc_iso_date("2017-05-04T13:27:13+02:00")
== "04 May 2017, 11:27 UTC"
)
def test_gen_path_info():
input_path = "/home/user/swh-environment/swh-web/"
expected_result = [
{"name": "home", "path": "home"},
{"name": "user", "path": "home/user"},
{"name": "swh-environment", "path": "home/user/swh-environment"},
{"name": "swh-web", "path": "home/user/swh-environment/swh-web"},
]
path_info = utils.gen_path_info(input_path)
assert path_info == expected_result
input_path = "home/user/swh-environment/swh-web"
path_info = utils.gen_path_info(input_path)
assert path_info == expected_result
def test_rst_to_html():
rst = (
"Section\n"
"=======\n\n"
"**Some strong text**\n\n"
"* This is a bulleted list.\n"
"* It has two items, the second\n"
" item uses two lines.\n"
"\n"
"1. This is a numbered list.\n"
"2. It has two items too.\n"
"\n"
"#. This is a numbered list.\n"
"#. It has two items too.\n"
)
expected_html = (
'Section
\n'
"
Some strong text
\n"
'
\n"
'
\n'
"This is a numbered list.
\n"
"It has two items too.
\n"
"This is a numbered list.
\n"
"It has two items too.
\n"
"
\n"
"
"
)
assert utils.rst_to_html(rst) == expected_html
def sample_test_view(request, string, number):
pass
def sample_test_view_no_url_args(request):
pass
urlpatterns = [
url(
r"^sample/test/(?P.+)/view/(?P[0-9]+)/$",
sample_test_view,
name="sample-test-view",
),
url(
r"^sample/test/view/no/url/args/$",
sample_test_view_no_url_args,
name="sample-test-view-no-url-args",
),
]
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_url_args_only_ok():
string = "foo"
number = 55
url = utils.reverse(
"sample-test-view", url_args={"string": string, "number": number}
)
assert url == f"/sample/test/{string}/view/{number}/"
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_url_args_only_ko():
string = "foo"
with pytest.raises(NoReverseMatch):
utils.reverse("sample-test-view", url_args={"string": string, "number": string})
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_no_url_args():
url = utils.reverse("sample-test-view-no-url-args")
assert url == "/sample/test/view/no/url/args/"
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_query_params_only():
start = 0
scope = "foo"
url = utils.reverse(
"sample-test-view-no-url-args", query_params={"start": start, "scope": scope}
)
assert url == f"/sample/test/view/no/url/args/?scope={scope}&start={start}"
url = utils.reverse(
"sample-test-view-no-url-args", query_params={"start": start, "scope": None}
)
assert url == f"/sample/test/view/no/url/args/?start={start}"
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_query_params_encode():
libname = "libstc++"
url = utils.reverse(
"sample-test-view-no-url-args", query_params={"libname": libname}
)
assert url == f"/sample/test/view/no/url/args/?libname={quote(libname, safe='/;:')}"
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_url_args_query_params():
string = "foo"
number = 55
start = 10
scope = "bar"
url = utils.reverse(
"sample-test-view",
url_args={"string": string, "number": number},
query_params={"start": start, "scope": scope},
)
assert url == f"/sample/test/{string}/view/{number}/?scope={scope}&start={start}"
@override_settings(ROOT_URLCONF=__name__)
def test_reverse_absolute_uri(request_factory):
request = request_factory.get(utils.reverse("sample-test-view-no-url-args"))
url = utils.reverse("sample-test-view-no-url-args", request=request)
assert url == f"http://{request.META['SERVER_NAME']}/sample/test/view/no/url/args/"
def test_get_deposits_list(requests_mock):
deposits_data = {
"count": 2,
"results": [
{
"check_task_id": "351820217",
"client": 2,
"collection": 1,
"complete_date": "2021-01-21T07:52:19.919312Z",
"external_id": "hal-03116143",
"id": 1412,
"load_task_id": "351820260",
"origin_url": "https://hal.archives-ouvertes.fr/hal-03116143",
"parent": None,
"reception_date": "2021-01-21T07:52:19.471019Z",
"status": "done",
"status_detail": None,
"swhid": "swh:1:dir:f25157ad1b13cb20ac3457d4f6756b49ac63d079",
},
{
"check_task_id": "381576507",
"client": 2,
"collection": 1,
"complete_date": "2021-07-07T08:00:44.726676Z",
"external_id": "hal-03275052",
"id": 1693,
"load_task_id": "381576508",
"origin_url": "https://hal.archives-ouvertes.fr/hal-03275052",
"parent": None,
"reception_date": "2021-07-07T08:00:44.327661Z",
"status": "done",
"status_detail": None,
"swhid": "swh:1:dir:825fa96d1810177ec08a772ffa5bd34bbd08b89c",
},
],
}
config = get_config()["deposit"]
deposits_list_url = config["private_api_url"] + "deposits"
basic_auth_payload = (
config["private_api_user"] + ":" + config["private_api_password"]
).encode()
requests_mock.get(
deposits_list_url,
json=deposits_data,
request_headers={
"Authorization": f"Basic {b64encode(basic_auth_payload).decode('ascii')}"
},
)
assert utils.get_deposits_list() == deposits_data["results"]
@pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
def test_origin_visit_types(mocker, backend):
if backend != "swh-search":
# equivalent to not configuring search in the config
search = mocker.patch("swh.web.common.utils.search")
search.return_value = None
assert utils.origin_visit_types() == []
else:
# see swh/web/tests/data.py for origins added for tests
assert utils.origin_visit_types() == ["git", "tar"]
@pytest.mark.parametrize("server_name", ["localhost", "127.0.0.1", "testserver"])
def test_is_swh_web_development(request_factory, server_name):
request = request_factory.get("/", SERVER_NAME=server_name)
assert utils.is_swh_web_development(request)
@pytest.mark.parametrize("server_name", SWH_WEB_STAGING_SERVER_NAMES)
def test_is_swh_web_staging(request_factory, server_name):
request = request_factory.get("/", SERVER_NAME=server_name)
assert utils.is_swh_web_staging(request)
def test_is_swh_web_production(request_factory):
request = request_factory.get("/", SERVER_NAME=SWH_WEB_SERVER_NAME)
assert utils.is_swh_web_production(request)
+
+
+@pytest.mark.parametrize(
+ "raw_metadata_file,expected_url",
+ [
+ ("raw-metadata-provenance.xml", "https://example.org/metadata/provenance"),
+ ("raw-metadata-no-swh.xml", None),
+ ],
+)
+def test_parse_swh_provenance(datadir, raw_metadata_file, expected_url):
+ metadata_path = join(datadir, "deposit", raw_metadata_file)
+ with open(metadata_path, "r") as f:
+ raw_metadata = f.read()
+
+ actual_url = utils.parse_swh_metadata_provenance(raw_metadata)
+
+ assert actual_url == expected_url
+
+
+@pytest.mark.parametrize(
+ "raw_metadata_file,expected_url",
+ [
+ (
+ "raw-metadata-create-origin.xml",
+ "https://example.org/metadata/create-origin",
+ ),
+ (
+ "raw-metadata-add-to-origin.xml",
+ "https://example.org/metadata/add-to-origin",
+ ),
+ ("raw-metadata-no-swh.xml", None),
+ ],
+)
+def test_parse_swh_origins(datadir, raw_metadata_file, expected_url):
+ metadata_path = join(datadir, "deposit", raw_metadata_file)
+ with open(metadata_path, "r") as f:
+ raw_metadata = f.read()
+
+ actual_url = utils.parse_swh_deposit_origin(raw_metadata)
+
+ assert actual_url == expected_url
diff --git a/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml b/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml
new file mode 100644
index 00000000..5174af34
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-add-to-origin.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ dudess
+
+
+
+
+
+
diff --git a/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml b/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml
new file mode 100644
index 00000000..fca04f42
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-create-origin.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a
+ dudess
+
+
+
+
+
+
diff --git a/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml b/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml
new file mode 100644
index 00000000..14675e8a
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-no-swh.xml
@@ -0,0 +1,7 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a
+ dudess
+
diff --git a/swh/web/tests/resources/deposit/raw-metadata-provenance.xml b/swh/web/tests/resources/deposit/raw-metadata-provenance.xml
new file mode 100644
index 00000000..12d958a5
--- /dev/null
+++ b/swh/web/tests/resources/deposit/raw-metadata-provenance.xml
@@ -0,0 +1,14 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a
+ dudess
+
+
+ https://example.org/metadata/provenance
+
+
+