diff --git a/cypress/integration/persistent-identifiers.spec.js b/cypress/integration/persistent-identifiers.spec.js --- a/cypress/integration/persistent-identifiers.spec.js +++ b/cypress/integration/persistent-identifiers.spec.js @@ -261,7 +261,7 @@ const swhIdsContext = win.swh.webapp.getSwhIdsContext(); for (let testData of testsData) { assert.isTrue(swhIdsContext.hasOwnProperty(testData.objectType)); - assert.equal(swhIdsContext[testData.objectType].swh_id, + assert.equal(swhIdsContext[testData.objectType].swhid, testData.objectPids.slice(-1)[0]); } }); diff --git a/swh/web/assets/src/bundles/webapp/webapp-utils.js b/swh/web/assets/src/bundles/webapp/webapp-utils.js --- a/swh/web/assets/src/bundles/webapp/webapp-utils.js +++ b/swh/web/assets/src/bundles/webapp/webapp-utils.js @@ -1,5 +1,5 @@ /** - * Copyright (C) 2018-2019 The Software Heritage developers + * Copyright (C) 2018-2020 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information @@ -296,8 +296,11 @@ // SWHID metadata contain the following keys: // * object_type: type of archived object // * object_id: sha1 object identifier -// * swh_id: SWH persistent identifier without contextual info -// * swh_id_url: URL to resolve SWH persistent identifier without contextual info +// * swhid: SWH persistent identifier without contextual info +// * swhid_url: URL to resolve SWH persistent identifier without contextual info +// * context: object describing SWHID context +// * swhid_with_context: SWH persistent identifier with contextual info +// * swhid_with_context_url: URL to resolve SWH persistent identifier with contextual info let swhidsContext_ = {}; export function setSwhIdsContext(swhidsContext) { diff --git a/swh/web/browse/snapshot_context.py b/swh/web/browse/snapshot_context.py --- a/swh/web/browse/snapshot_context.py +++ b/swh/web/browse/snapshot_context.py @@ -21,6 +21,9 @@ snapshot_identifier, CONTENT, DIRECTORY, + REVISION, + RELEASE, + SNAPSHOT, ) from swh.web.browse.utils import ( @@ -49,6 +52,7 @@ SnapshotContext, ContentMetadata, DirectoryMetadata, + SWHObjectInfo, ) from swh.web.common.utils import ( reverse, @@ -751,9 +755,9 @@ revision_found = False swh_objects = [ - {"type": "directory", "id": sha1_git}, - {"type": "revision", "id": revision_id}, - {"type": "snapshot", "id": snapshot_id}, + SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git), + SWHObjectInfo(object_type=REVISION, object_id=revision_id), + SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] visit_date = None @@ -765,16 +769,18 @@ release_id = snapshot_context["release_id"] browse_rel_link = None if release_id: - swh_objects.append({"type": "release", "id": release_id}) + swh_objects.append(SWHObjectInfo(object_type=RELEASE, object_id=release_id)) browse_rel_link = gen_release_link(release_id) dir_metadata = DirectoryMetadata( object_type=DIRECTORY, + object_id=sha1_git, directory=sha1_git, directory_url=browse_dir_link, nb_files=nb_files, nb_dirs=nb_dirs, sum_file_sizes=sum_file_sizes, + root_directory=root_directory, path=dir_path, revision=revision_id, revision_found=revision_found, @@ -795,7 +801,7 @@ "revision_id": revision_id, } - swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) + swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context, dir_metadata) dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" context_found = "snapshot: %s" % snapshot_context["snapshot_id"] @@ -931,10 +937,10 @@ content_checksums = content_data.get("checksums", {}) swh_objects = [ - {"type": "content", "id": content_checksums.get("sha1_git")}, - {"type": "directory", "id": directory_id}, - {"type": "revision", "id": revision_id}, - {"type": "snapshot", "id": snapshot_id}, + SWHObjectInfo(object_type=CONTENT, object_id=content_checksums.get("sha1_git")), + SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id), + SWHObjectInfo(object_type=REVISION, object_id=revision_id), + SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] visit_date = None @@ -946,11 +952,12 @@ release_id = snapshot_context["release_id"] browse_rel_link = None if release_id: - swh_objects.append({"type": "release", "id": release_id}) + swh_objects.append(SWHObjectInfo(object_type=RELEASE, object_id=release_id)) browse_rel_link = gen_release_link(release_id) content_metadata = ContentMetadata( object_type=CONTENT, + object_id=content_checksums.get("sha1_git"), sha1=content_checksums.get("sha1"), sha1_git=content_checksums.get("sha1_git"), sha256=content_checksums.get("sha256"), @@ -961,6 +968,7 @@ size=filesizeformat(content_data.get("length", 0)), language=content_data.get("language"), licenses=content_data.get("licenses"), + root_directory=root_directory, path=f"/{filepath}", filename=filename, directory=directory_id, @@ -976,7 +984,7 @@ visit_type=visit_type, ) - swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) + swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context, content_metadata) content_path = "/".join([bc["name"] for bc in breadcrumbs]) context_found = "snapshot: %s" % snapshot_context["snapshot_id"] @@ -1132,13 +1140,13 @@ revision_metadata["origin visit type"] = visit_info["type"] swh_objects = [ - {"type": "revision", "id": revision_id}, - {"type": "snapshot", "id": snapshot_id}, + SWHObjectInfo(object_type=REVISION, object_id=revision_id), + SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] release_id = snapshot_context["release_id"] if release_id: - swh_objects.append({"type": "release", "id": release_id}) + swh_objects.append(SWHObjectInfo(object_type=RELEASE, object_id=release_id)) browse_rel_link = gen_release_link(release_id) revision_metadata["release"] = release_id revision_metadata["context-independent release"] = browse_rel_link diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -28,7 +28,7 @@ from swh.web.common import query, service, highlightjs from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.common.identifiers import get_swh_persistent_ids -from swh.web.common.typing import ContentMetadata +from swh.web.common.typing import ContentMetadata, SWHObjectInfo from swh.web.common.utils import reverse, gen_path_info, swh_object_icons @@ -287,6 +287,8 @@ return handle_view_exception(request, exc) elif root_dir != path: directory_id = root_dir + else: + root_dir = None if directory_id: directory_url = gen_directory_link(directory_id) @@ -308,6 +310,7 @@ content_metadata = ContentMetadata( object_type=CONTENT, + object_id=content_checksums["sha1_git"], sha1=content_checksums["sha1"], sha1_git=content_checksums["sha1_git"], sha256=content_checksums["sha256"], @@ -318,8 +321,9 @@ size=filesizeformat(content_data["length"]), language=content_data["language"], licenses=content_data["licenses"], - path=path, - filename=filename, + root_directory=root_dir, + path=f"/{path}" if path else "", + filename=filename or "", directory=directory_id, directory_url=directory_url, revision=None, @@ -329,7 +333,8 @@ ) swh_ids = get_swh_persistent_ids( - [{"type": "content", "id": content_checksums["sha1_git"]}] + [SWHObjectInfo(object_type=CONTENT, object_id=content_checksums["sha1_git"])], + extra_context=content_metadata, ) heading = "Content - %s" % content_checksums["sha1_git"] @@ -342,7 +347,7 @@ "browse/content.html", { "heading": heading, - "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_id": swh_ids[0]["swhid"], "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content, diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -22,7 +22,7 @@ from swh.web.common import service from swh.web.common.exc import handle_view_exception, NotFoundExc from swh.web.common.identifiers import get_swh_persistent_ids -from swh.web.common.typing import DirectoryMetadata +from swh.web.common.typing import DirectoryMetadata, SWHObjectInfo from swh.web.common.utils import reverse, gen_path_info @@ -130,11 +130,13 @@ dir_metadata = DirectoryMetadata( object_type=DIRECTORY, + object_id=sha1_git, directory=sha1_git, nb_files=len(files), nb_dirs=len(dirs), sum_file_sizes=sum_file_sizes, - path=path or None, + root_directory=root_sha1_git, + path=f"/{path}" if path else "", revision=None, revision_found=None, release=None, @@ -148,11 +150,9 @@ "revision_id": None, } - swh_objects = [{"type": "directory", "id": sha1_git}] + swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)] - swh_ids = get_swh_persistent_ids( - swh_objects=swh_objects, snapshot_context=snapshot_context - ) + swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context, dir_metadata) heading = "Directory - %s" % sha1_git if breadcrumbs: @@ -164,7 +164,7 @@ "browse/directory.html", { "heading": heading, - "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_id": swh_ids[0]["swhid"], "swh_object_name": "Directory", "swh_object_metadata": dir_metadata, "dirs": dirs, diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -21,7 +21,7 @@ from swh.web.common import service from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.common.identifiers import get_swh_persistent_ids -from swh.web.common.typing import ReleaseMetadata +from swh.web.common.typing import ReleaseMetadata, SWHObjectInfo from swh.web.common.utils import reverse, format_utc_iso_date @@ -89,6 +89,7 @@ release_metadata = ReleaseMetadata( object_type=RELEASE, + object_id=sha1_git, release=sha1_git, release_url=gen_release_link(release["id"]), author=release["author"]["fullname"] if release["author"] else "None", @@ -192,13 +193,13 @@ release["directory_link"] = directory_link release["target_link"] = target_link - swh_objects = [{"type": RELEASE, "id": sha1_git}] + swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)] if snapshot_context: snapshot_id = snapshot_context["snapshot_id"] if snapshot_id: - swh_objects.append({"type": SNAPSHOT, "id": snapshot_id}) + swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) @@ -221,7 +222,7 @@ "browse/release.html", { "heading": heading, - "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_id": swh_ids[0]["swhid"], "swh_object_name": "Release", "swh_object_metadata": release_metadata, "release": release, diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -12,7 +12,13 @@ from django.template.defaultfilters import filesizeformat from django.utils.safestring import mark_safe -from swh.model.identifiers import persistent_identifier, REVISION +from swh.model.identifiers import ( + persistent_identifier, + CONTENT, + DIRECTORY, + REVISION, + SNAPSHOT, +) from swh.web.browse.browseurls import browse_route from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( @@ -33,7 +39,7 @@ from swh.web.common import service from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.common.identifiers import get_swh_persistent_ids -from swh.web.common.typing import RevisionMetadata +from swh.web.common.typing import RevisionMetadata, SWHObjectInfo from swh.web.common.utils import ( reverse, format_utc_iso_date, @@ -353,6 +359,7 @@ revision_metadata = RevisionMetadata( object_type=REVISION, + object_id=sha1_git, revision=sha1_git, revision_url=gen_revision_link(sha1_git), author=revision["author"]["fullname"] if revision["author"] else "None", @@ -430,7 +437,7 @@ "revision_id": sha1_git, } - swh_objects = [{"type": "revision", "id": sha1_git}] + swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)] content = None content_size = None @@ -445,6 +452,10 @@ error_message = "" error_description = "" + extra_context = dict(revision_metadata) + if path: + extra_context["path"] = f"/{path}" + if content_data: breadcrumbs[-1]["url"] = None content_size = content_data["length"] @@ -460,6 +471,7 @@ if path: filename = path_info[-1]["name"] query_params["filename"] = filename + extra_context["filename"] = filename top_right_link = { "url": reverse( @@ -471,7 +483,9 @@ "text": "Raw File", } - swh_objects.append({"type": "content", "id": file_info["target"]}) + swh_objects.append( + SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]) + ) error_code = content_data["error_code"] error_message = content_data["error_message"] @@ -512,7 +526,7 @@ vault_cooking["directory_context"] = True vault_cooking["directory_id"] = dir_id - swh_objects.append({"type": "directory", "id": dir_id}) + swh_objects.append(SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id)) diff_revision_url = reverse( "diff-revision", @@ -525,9 +539,9 @@ ) if snapshot_id: - swh_objects.append({"type": "snapshot", "id": snapshot_id}) + swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) - swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) + swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context, extra_context) heading = "Revision - %s - %s" % ( sha1_git[:7], @@ -544,7 +558,7 @@ "browse/revision.html", { "heading": heading, - "swh_object_id": swh_ids[0]["swh_id"], + "swh_object_id": swh_ids[0]["swhid"], "swh_object_name": "Revision", "swh_object_metadata": revision_metadata, "message_header": message_lines[0], diff --git a/swh/web/common/identifiers.py b/swh/web/common/identifiers.py --- a/swh/web/common/identifiers.py +++ b/swh/web/common/identifiers.py @@ -3,7 +3,8 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Dict, Iterable, List, Optional +from urllib.parse import quote +from typing import Any, Dict, Iterable, List, Optional from typing_extensions import TypedDict from django.http import QueryDict @@ -23,12 +24,21 @@ ) from swh.web.common.exc import BadInputExc -from swh.web.common.typing import QueryParameters -from swh.web.common.utils import swh_object_icons, reverse +from swh.web.common.typing import ( + QueryParameters, + SnapshotContext, + SWHObjectInfo, + SWHIDInfo, + SWHIDContext, +) +from swh.web.common.utils import reverse def get_swh_persistent_id( - object_type: str, object_id: str, scheme_version: int = 1 + object_type: str, + object_id: str, + scheme_version: int = 1, + metadata: SWHIDContext = {}, ) -> str: """ Returns the persistent identifier for a swh object based on: @@ -53,7 +63,7 @@ generate a valid identifier """ try: - swh_id = persistent_identifier(object_type, object_id, scheme_version) + swh_id = persistent_identifier(object_type, object_id, scheme_version, metadata) except ValidationError as e: raise BadInputExc( "Invalid object (%s) for swh persistent id. %s" % (object_id, e) @@ -197,50 +207,114 @@ return pids_by_type -def get_swh_persistent_ids(swh_objects, snapshot_context=None): +def get_swh_persistent_ids( + swh_objects: Iterable[SWHObjectInfo], + snapshot_context: Optional[SnapshotContext] = None, + extra_context: Optional[Dict[str, Any]] = None, +) -> List[SWHIDInfo]: """ Returns a list of dict containing info related to persistent identifiers of swh objects. Args: - swh_objects (list): a list of dict with the following keys: - - * type: swh object type - (content/directory/release/revision/snapshot) - * id: swh object id - - snapshot_context (dict): optional parameter describing the snapshot in - which the object has been found + swh_objects: an iterable of dict describing archived objects + snapshot_context: optional dict parameter describing the snapshot in + which the objects have been found + extra_context: optional dict filled with extra contextual info about + the objects Returns: - list: a list of dict with the following keys: - * object_type: the swh object type - (content/directory/release/revision/snapshot) - * object_icon: the swh object icon to use in HTML views - * swh_id: the computed swh object persistent identifier - * swh_id_url: the url resolving the persistent identifier - * show_options: boolean indicating if the persistent id options - must be displayed in persistent ids HTML view + a list of dict containing persistent identifiers info + """ swh_ids = [] for swh_object in swh_objects: - if not swh_object["id"]: + if not swh_object["object_id"]: + swh_ids.append( + SWHIDInfo( + object_type=swh_object["object_type"], + object_id="", + swhid="", + swhid_url="", + context={}, + swhid_with_context=None, + swhid_with_context_url=None, + ) + ) continue - swh_id = get_swh_persistent_id(swh_object["type"], swh_object["id"]) - show_options = swh_object["type"] == "content" or ( - snapshot_context and snapshot_context["origin_info"] is not None - ) - - object_icon = swh_object_icons[swh_object["type"]] + object_type = swh_object["object_type"] + object_id = swh_object["object_id"] + swhid_context: SWHIDContext = {} + if snapshot_context: + if snapshot_context["origin_info"] is not None: + swhid_context["origin"] = quote( + snapshot_context["origin_info"]["url"], safe="/?:@&" + ) + if object_type != SNAPSHOT: + swhid_context["visit"] = get_swh_persistent_id( + SNAPSHOT, snapshot_context["snapshot_id"] + ) + if object_type not in (RELEASE, REVISION, SNAPSHOT): + if snapshot_context["release_id"] is not None: + swhid_context["anchor"] = get_swh_persistent_id( + RELEASE, snapshot_context["release_id"] + ) + elif snapshot_context["revision_id"] is not None: + swhid_context["anchor"] = get_swh_persistent_id( + REVISION, snapshot_context["revision_id"] + ) + + if object_type in (CONTENT, DIRECTORY): + if ( + extra_context + and "revision" in extra_context + and extra_context["revision"] + ): + swhid_context["anchor"] = get_swh_persistent_id( + REVISION, extra_context["revision"] + ) + elif ( + extra_context + and "root_directory" in extra_context + and extra_context["root_directory"] + and ( + object_type != DIRECTORY + or extra_context["root_directory"] != object_id + ) + ): + swhid_context["anchor"] = get_swh_persistent_id( + DIRECTORY, extra_context["root_directory"] + ) + path = None + if extra_context and "path" in extra_context: + path = extra_context["path"] + if "filename" in extra_context and object_type == CONTENT: + path += extra_context["filename"] + if path: + swhid_context["path"] = quote(path, safe="/?:@&") + + swhid = get_swh_persistent_id(object_type, object_id) + swhid_url = reverse("browse-swh-id", url_args={"swh_id": swhid}) + + swhid_with_context = None + swhid_with_context_url = None + if swhid_context: + swhid_with_context = get_swh_persistent_id( + object_type, object_id, metadata=swhid_context + ) + swhid_with_context_url = reverse( + "browse-swh-id", url_args={"swh_id": swhid_with_context} + ) swh_ids.append( - { - "object_type": swh_object["type"], - "object_id": swh_object["id"], - "object_icon": object_icon, - "swh_id": swh_id, - "swh_id_url": reverse("browse-swh-id", url_args={"swh_id": swh_id}), - "show_options": show_options, - } + SWHIDInfo( + object_type=object_type, + object_id=object_id, + swhid=swhid, + swhid_url=swhid_url, + context=swhid_context, + swhid_with_context=swhid_with_context, + swhid_with_context_url=swhid_with_context_url, + ) ) return swh_ids diff --git a/swh/web/common/typing.py b/swh/web/common/typing.py --- a/swh/web/common/typing.py +++ b/swh/web/common/typing.py @@ -112,8 +112,28 @@ """optional origin visit info associated to the snapshot""" -class SWHObjectMetadata(TypedDict, total=False): +class SWHObjectInfo(TypedDict): object_type: str + object_id: str + + +class SWHIDContext(TypedDict, total=False): + origin: str + anchor: str + visit: str + path: str + lines: str + + +class SWHIDInfo(SWHObjectInfo): + swhid: str + swhid_url: str + context: SWHIDContext + swhid_with_context: Optional[str] + swhid_with_context_url: Optional[str] + + +class SWHObjectInfoMetadata(TypedDict, total=False): origin_url: Optional[str] visit_date: Optional[str] visit_type: Optional[str] @@ -123,7 +143,7 @@ snapshot_url: Optional[str] -class ContentMetadata(SWHObjectMetadata): +class ContentMetadata(SWHObjectInfo, SWHObjectInfoMetadata): sha1: str sha1_git: str sha256: str @@ -137,16 +157,18 @@ path: Optional[str] filename: Optional[str] directory: Optional[str] + root_directory: Optional[str] revision: Optional[str] release: Optional[str] snapshot: Optional[str] -class DirectoryMetadata(SWHObjectMetadata): +class DirectoryMetadata(SWHObjectInfo, SWHObjectInfoMetadata): directory: str nb_files: int nb_dirs: int sum_file_sizes: str + root_directory: Optional[str] path: str revision: Optional[str] revision_found: Optional[bool] @@ -154,7 +176,7 @@ snapshot: Optional[str] -class ReleaseMetadata(SWHObjectMetadata): +class ReleaseMetadata(SWHObjectInfo, SWHObjectInfoMetadata): release: str author: str author_url: str @@ -167,7 +189,7 @@ snapshot: Optional[str] -class RevisionMetadata(SWHObjectMetadata): +class RevisionMetadata(SWHObjectInfo, SWHObjectInfoMetadata): revision: str author: str author_url: str diff --git a/swh/web/templates/includes/show-swh-ids.html b/swh/web/templates/includes/show-swh-ids.html --- a/swh/web/templates/includes/show-swh-ids.html +++ b/swh/web/templates/includes/show-swh-ids.html @@ -1,5 +1,5 @@ {% comment %} -Copyright (C) 2017-2019 The Software Heritage developers +Copyright (C) 2017-2020 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information @@ -30,13 +30,13 @@ {% if forloop.first %} {% else %} {% endif %} @@ -57,11 +57,13 @@ onclick="swh.webapp.showBadgeInfoModal('origin', '{{ snapshot_context.origin_info.url }}')" title="Click to display badge integration info"> {% endif %} - -
{{ swh_id.swh_id }}
- {% if swh_id.show_options %} + {% if swh_id.object_id %} + +
{{ swh_id.swhid }}
+ {% endif %} + {% if swh_id.swhid_with_context is not None %}
{% if snapshot_context and snapshot_context.origin_info %} diff --git a/swh/web/tests/common/test_identifiers.py b/swh/web/tests/common/test_identifiers.py --- a/swh/web/tests/common/test_identifiers.py +++ b/swh/web/tests/common/test_identifiers.py @@ -3,6 +3,8 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +import random + from hypothesis import given import pytest @@ -23,10 +25,22 @@ resolve_swh_persistent_id, get_persistent_identifier, group_swh_persistent_identifiers, + get_swh_persistent_ids, ) +from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.common.utils import reverse +from swh.web.common.typing import SWHObjectInfo from swh.web.tests.data import random_sha1 -from swh.web.tests.strategies import content, directory, release, revision, snapshot +from swh.web.tests.strategies import ( + content, + directory, + release, + revision, + snapshot, + origin, + origin_with_multiple_visits, + directory_with_subdirs, +) @given(content()) @@ -119,3 +133,287 @@ pid_groups = group_swh_persistent_identifiers(swh_pids) assert pid_groups == expected + + +@given(directory_with_subdirs()) +def test_get_swh_persistent_ids_directory_context(archive_data, directory): + extra_context = {"path": "/"} + swhid = get_swh_persistent_ids( + [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)], + snapshot_context=None, + extra_context=extra_context, + )[0] + swhid_dir_parsed = get_persistent_identifier(swhid["swhid_with_context"]) + + assert swhid_dir_parsed.metadata == extra_context + + dir_content = archive_data.directory_ls(directory) + dir_subdirs = [e for e in dir_content if e["type"] == "dir"] + dir_subdir = random.choice(dir_subdirs) + dir_subdir_path = f'/{dir_subdir["name"]}/' + + dir_subdir_content = archive_data.directory_ls(dir_subdir["target"]) + dir_subdir_files = [e for e in dir_subdir_content if e["type"] == "file"] + dir_subdir_file = random.choice(dir_subdir_files) + + extra_context = { + "root_directory": directory, + "path": dir_subdir_path, + "filename": dir_subdir_file["name"], + } + swhids = get_swh_persistent_ids( + [ + SWHObjectInfo(object_type=DIRECTORY, object_id=dir_subdir["target"]), + SWHObjectInfo( + object_type=CONTENT, object_id=dir_subdir_file["checksums"]["sha1_git"] + ), + ], + snapshot_context=None, + extra_context=extra_context, + ) + swhid_dir_parsed = get_persistent_identifier(swhids[0]["swhid_with_context"]) + swhid_cnt_parsed = get_persistent_identifier(swhids[1]["swhid_with_context"]) + + anchor = get_swh_persistent_id(DIRECTORY, directory) + + assert swhid_dir_parsed.metadata == { + "anchor": anchor, + "path": dir_subdir_path, + } + + assert swhid_cnt_parsed.metadata == { + "anchor": anchor, + "path": f'{dir_subdir_path}{dir_subdir_file["name"]}', + } + + +@given(revision()) +def test_get_swh_persistent_ids_revision_context(archive_data, revision): + revision_data = archive_data.revision_get(revision) + directory = revision_data["directory"] + dir_content = archive_data.directory_ls(directory) + dir_entry = random.choice(dir_content) + + swh_objects = [ + SWHObjectInfo(object_type=REVISION, object_id=revision), + SWHObjectInfo(object_type=DIRECTORY, object_id=directory), + ] + + extra_context = {"revision": revision, "path": "/"} + if dir_entry["type"] == "file": + swh_objects.append( + SWHObjectInfo( + object_type=CONTENT, object_id=dir_entry["checksums"]["sha1_git"] + ) + ) + extra_context["filename"] = dir_entry["name"] + + swhids = get_swh_persistent_ids( + swh_objects, snapshot_context=None, extra_context=extra_context, + ) + + assert swhids[0]["context"] == {} + swhid_dir_parsed = get_persistent_identifier(swhids[1]["swhid_with_context"]) + + anchor = get_swh_persistent_id(REVISION, revision) + + assert swhid_dir_parsed.metadata == { + "anchor": anchor, + "path": "/", + } + + if dir_entry["type"] == "file": + swhid_cnt_parsed = get_persistent_identifier(swhids[2]["swhid_with_context"]) + assert swhid_cnt_parsed.metadata == { + "anchor": anchor, + "path": f'/{dir_entry["name"]}', + } + + +@given(origin_with_multiple_visits()) +def test_get_swh_persistent_ids_origin_snapshot_context(archive_data, origin): + """ + Test SWHIDs with contextual info computation under a variety of origin / snapshot + browsing contexts. + """ + + visits = archive_data.origin_visit_get(origin["url"]) + + for visit in visits: + snapshot = archive_data.snapshot_get(visit["snapshot"]) + snapshot_id = snapshot["id"] + branches = { + k: v["target"] + for k, v in snapshot["branches"].items() + if v["target_type"] == "revision" + } + releases = { + k: v["target"] + for k, v in snapshot["branches"].items() + if v["target_type"] == "release" + } + head_rev_id = archive_data.snapshot_get_head(snapshot) + head_rev = archive_data.revision_get(head_rev_id) + root_dir = head_rev["directory"] + dir_content = archive_data.directory_ls(root_dir) + dir_files = [e for e in dir_content if e["type"] == "file"] + dir_file = random.choice(dir_files) + revision_log = [r["id"] for r in archive_data.revision_log(head_rev_id)] + + branch_name = random.choice(list(branches)) + release = random.choice(list(releases)) + release_data = archive_data.release_get(releases[release]) + release_name = release_data["name"] + revision_id = random.choice(revision_log) + + for snp_ctx_params, anchor_info in ( + ( + {"snapshot_id": snapshot_id}, + {"anchor_type": REVISION, "anchor_id": head_rev_id}, + ), + ( + {"snapshot_id": snapshot_id, "branch_name": branch_name}, + {"anchor_type": REVISION, "anchor_id": branches[branch_name]}, + ), + ( + {"snapshot_id": snapshot_id, "release_name": release_name}, + {"anchor_type": RELEASE, "anchor_id": releases[release]}, + ), + ( + {"snapshot_id": snapshot_id, "revision_id": revision_id}, + {"anchor_type": REVISION, "anchor_id": revision_id}, + ), + ( + {"origin_url": origin["url"], "snapshot_id": snapshot_id}, + {"anchor_type": REVISION, "anchor_id": head_rev_id}, + ), + ( + { + "origin_url": origin["url"], + "snapshot_id": snapshot_id, + "branch_name": branch_name, + }, + {"anchor_type": REVISION, "anchor_id": branches[branch_name]}, + ), + ( + { + "origin_url": origin["url"], + "snapshot_id": snapshot_id, + "release_name": release_name, + }, + {"anchor_type": RELEASE, "anchor_id": releases[release]}, + ), + ( + { + "origin_url": origin["url"], + "snapshot_id": snapshot_id, + "revision_id": revision_id, + }, + {"anchor_type": REVISION, "anchor_id": revision_id}, + ), + ): + + snapshot_context = get_snapshot_context(**snp_ctx_params) + + rev_id = head_rev_id + if "branch_name" in snp_ctx_params: + rev_id = branches[branch_name] + elif "release_name" in snp_ctx_params: + rev_id = release_data["target"] + elif "revision_id" in snp_ctx_params: + rev_id = revision_id + + swh_objects = [ + SWHObjectInfo( + object_type=CONTENT, object_id=dir_file["checksums"]["sha1_git"] + ), + SWHObjectInfo(object_type=DIRECTORY, object_id=root_dir), + SWHObjectInfo(object_type=REVISION, object_id=rev_id), + SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), + ] + + if "release_name" in snp_ctx_params: + swh_objects.append( + SWHObjectInfo(object_type=RELEASE, object_id=release_data["id"]) + ) + + swhids = get_swh_persistent_ids( + swh_objects, + snapshot_context, + extra_context={"path": "/", "filename": dir_file["name"]}, + ) + + swhid_cnt_parsed = get_persistent_identifier( + swhids[0]["swhid_with_context"] + ) + swhid_dir_parsed = get_persistent_identifier( + swhids[1]["swhid_with_context"] + ) + swhid_rev_parsed = get_persistent_identifier( + swhids[2]["swhid_with_context"] + ) + + swhid_snp_parsed = get_persistent_identifier( + swhids[3]["swhid_with_context"] or swhids[3]["swhid"] + ) + + swhid_rel_parsed = None + if "release_name" in snp_ctx_params: + swhid_rel_parsed = get_persistent_identifier( + swhids[4]["swhid_with_context"] + ) + + anchor = get_swh_persistent_id( + object_type=anchor_info["anchor_type"], + object_id=anchor_info["anchor_id"], + ) + + snapshot_swhid = get_swh_persistent_id( + object_type=SNAPSHOT, object_id=snapshot_id + ) + + expected_cnt_context = { + "visit": snapshot_swhid, + "anchor": anchor, + "path": f'/{dir_file["name"]}', + } + + expected_dir_context = { + "visit": snapshot_swhid, + "anchor": anchor, + "path": "/", + } + + expected_rev_context = {"visit": snapshot_swhid} + + expected_snp_context = {} + + if "origin_url" in snp_ctx_params: + expected_cnt_context["origin"] = origin["url"] + expected_dir_context["origin"] = origin["url"] + expected_rev_context["origin"] = origin["url"] + expected_snp_context["origin"] = origin["url"] + + assert swhid_cnt_parsed.metadata == expected_cnt_context + assert swhid_dir_parsed.metadata == expected_dir_context + assert swhid_rev_parsed.metadata == expected_rev_context + assert swhid_snp_parsed.metadata == expected_snp_context + + if "release_name" in snp_ctx_params: + assert swhid_rel_parsed.metadata == expected_rev_context + + +@given(origin(), directory()) +def test_get_swh_persistent_ids_path_encoding(archive_data, origin, directory): + snapshot_context = get_snapshot_context(origin_url=origin["url"]) + snapshot_context["origin_info"]["url"] = "http://example.org/?project=abc;def%" + path = "/foo;/bar%" + + swhid = get_swh_persistent_ids( + [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)], + snapshot_context=snapshot_context, + extra_context={"path": path}, + )[0] + + assert swhid["context"]["origin"] == "http://example.org/?project%3Dabc%3Bdef%25" + assert swhid["context"]["path"] == "/foo%3B/bar%25"