diff --git a/swh/web/browse/views/utils/snapshot_context.py b/swh/web/browse/snapshot_context.py rename from swh/web/browse/views/utils/snapshot_context.py rename to swh/web/browse/snapshot_context.py --- a/swh/web/browse/views/utils/snapshot_context.py +++ b/swh/web/browse/snapshot_context.py @@ -3,20 +3,20 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -# Utility module implementing Django views for browsing the archive -# in a snapshot context. -# Its purpose is to factorize code for the views reachable from the -# /origin/.* and /snapshot/.* endpoints. +# Utility module for browsing the archive in a snapshot context. +from collections import defaultdict + + +from django.core.cache import cache from django.shortcuts import render from django.template.defaultfilters import filesizeformat from django.utils.html import escape import sentry_sdk -from swh.model.identifiers import snapshot_identifier +from swh.model.identifiers import persistent_identifier, snapshot_identifier from swh.web.browse.utils import ( - get_snapshot_context, get_directory_entries, gen_directory_link, gen_revision_link, @@ -30,17 +30,19 @@ get_readme_to_display, get_swh_persistent_ids, gen_snapshot_link, - process_snapshot_branches, ) from swh.web.common import service, highlightjs from swh.web.common.exc import handle_view_exception, NotFoundExc +from swh.web.common.origin_visits import get_origin_visit from swh.web.common.utils import ( reverse, gen_path_info, format_utc_iso_date, swh_object_icons, ) +from swh.web.config import get_config + _empty_snapshot_id = snapshot_identifier({"branches": {}}) @@ -152,6 +154,329 @@ raise NotFoundExc(escape(msg)) +def process_snapshot_branches(snapshot): + """ + Process a dictionary describing snapshot branches: extract those + targeting revisions and releases, put them in two different lists, + then sort those lists in lexicographical order of the branches' names. + + Args: + snapshot_branches (dict): A dict describing the branches of a snapshot + as returned for instance by + :func:`swh.web.common.service.lookup_snapshot` + + Returns: + tuple: A tuple whose first member is the sorted list of branches + targeting revisions and second member the sorted list of branches + targeting releases + """ + snapshot_branches = snapshot["branches"] + branches = {} + branch_aliases = {} + releases = {} + revision_to_branch = defaultdict(set) + revision_to_release = defaultdict(set) + release_to_branch = defaultdict(set) + for branch_name, target in snapshot_branches.items(): + if not target: + # FIXME: display branches with an unknown target anyway + continue + target_id = target["target"] + target_type = target["target_type"] + if target_type == "revision": + branches[branch_name] = { + "name": branch_name, + "revision": target_id, + } + revision_to_branch[target_id].add(branch_name) + elif target_type == "release": + release_to_branch[target_id].add(branch_name) + elif target_type == "alias": + branch_aliases[branch_name] = target_id + # FIXME: handle pointers to other object types + + def _enrich_release_branch(branch, release): + releases[branch] = { + "name": release["name"], + "branch_name": branch, + "date": format_utc_iso_date(release["date"]), + "id": release["id"], + "message": release["message"], + "target_type": release["target_type"], + "target": release["target"], + } + + def _enrich_revision_branch(branch, revision): + branches[branch].update( + { + "revision": revision["id"], + "directory": revision["directory"], + "date": format_utc_iso_date(revision["date"]), + "message": revision["message"], + } + ) + + releases_info = service.lookup_release_multiple(release_to_branch.keys()) + for release in releases_info: + branches_to_update = release_to_branch[release["id"]] + for branch in branches_to_update: + _enrich_release_branch(branch, release) + if release["target_type"] == "revision": + revision_to_release[release["target"]].update(branches_to_update) + + revisions = service.lookup_revision_multiple( + set(revision_to_branch.keys()) | set(revision_to_release.keys()) + ) + + for revision in revisions: + if not revision: + continue + for branch in revision_to_branch[revision["id"]]: + _enrich_revision_branch(branch, revision) + for release in revision_to_release[revision["id"]]: + releases[release]["directory"] = revision["directory"] + + for branch_alias, branch_target in branch_aliases.items(): + if branch_target in branches: + branches[branch_alias] = dict(branches[branch_target]) + else: + snp = service.lookup_snapshot( + snapshot["id"], branches_from=branch_target, branches_count=1 + ) + if snp and branch_target in snp["branches"]: + + if snp["branches"][branch_target] is None: + continue + + target_type = snp["branches"][branch_target]["target_type"] + target = snp["branches"][branch_target]["target"] + if target_type == "revision": + branches[branch_alias] = snp["branches"][branch_target] + revision = service.lookup_revision(target) + _enrich_revision_branch(branch_alias, revision) + elif target_type == "release": + release = service.lookup_release(target) + _enrich_release_branch(branch_alias, release) + + if branch_alias in branches: + branches[branch_alias]["name"] = branch_alias + + ret_branches = list(sorted(branches.values(), key=lambda b: b["name"])) + ret_releases = list(sorted(releases.values(), key=lambda b: b["name"])) + + return ret_branches, ret_releases + + +def get_snapshot_content(snapshot_id): + """Returns the lists of branches and releases + associated to a swh snapshot. + That list is put in cache in order to speedup the navigation + in the swh-web/browse ui. + + .. warning:: At most 1000 branches contained in the snapshot + will be returned for performance reasons. + + Args: + snapshot_id (str): hexadecimal representation of the snapshot + identifier + + Returns: + A tuple with two members. The first one is a list of dict describing + the snapshot branches. The second one is a list of dict describing the + snapshot releases. + + Raises: + NotFoundExc if the snapshot does not exist + """ + cache_entry_id = "swh_snapshot_%s" % snapshot_id + cache_entry = cache.get(cache_entry_id) + + if cache_entry: + return cache_entry["branches"], cache_entry["releases"] + + branches = [] + releases = [] + + snapshot_content_max_size = get_config()["snapshot_content_max_size"] + + if snapshot_id: + snapshot = service.lookup_snapshot( + snapshot_id, branches_count=snapshot_content_max_size + ) + branches, releases = process_snapshot_branches(snapshot) + + cache.set(cache_entry_id, {"branches": branches, "releases": releases,}) + + return branches, releases + + +def get_origin_visit_snapshot( + origin_info, visit_ts=None, visit_id=None, snapshot_id=None +): + """Returns the lists of branches and releases + associated to a swh origin for a given visit. + The visit is expressed by a timestamp. In the latter case, + the closest visit from the provided timestamp will be used. + If no visit parameter is provided, it returns the list of branches + found for the latest visit. + That list is put in cache in order to speedup the navigation + in the swh-web/browse ui. + + .. warning:: At most 1000 branches contained in the snapshot + will be returned for performance reasons. + + Args: + origin_info (dict): a dict filled with origin information + (id, url, type) + visit_ts (int or str): an ISO date string or Unix timestamp to parse + visit_id (int): optional visit id for disambiguation in case + several visits have the same timestamp + + Returns: + A tuple with two members. The first one is a list of dict describing + the origin branches for the given visit. + The second one is a list of dict describing the origin releases + for the given visit. + + Raises: + NotFoundExc if the origin or its visit are not found + """ + + visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) + + return get_snapshot_content(visit_info["snapshot"]) + + +def get_snapshot_context( + snapshot_id=None, origin_url=None, timestamp=None, visit_id=None +): + """ + Utility function to compute relevant information when navigating + the archive in a snapshot context. The snapshot is either + referenced by its id or it will be retrieved from an origin visit. + + Args: + snapshot_id (str): hexadecimal representation of a snapshot identifier, + all other parameters will be ignored if it is provided + origin_url (str): the origin_url + (e.g. https://github.com/(user)/(repo)/) + timestamp (str): a datetime string for retrieving the closest + visit of the origin + visit_id (int): optional visit id for disambiguation in case + of several visits with the same timestamp + + Returns: + A dict with the following entries: + * origin_info: dict containing origin information + * visit_info: dict containing visit information + * branches: the list of branches for the origin found + during the visit + * releases: the list of releases for the origin found + during the visit + * origin_browse_url: the url to browse the origin + * origin_branches_url: the url to browse the origin branches + * origin_releases_url': the url to browse the origin releases + * origin_visit_url: the url to browse the snapshot of the origin + found during the visit + * url_args: dict containing url arguments to use when browsing in + the context of the origin and its visit + + Raises: + swh.web.common.exc.NotFoundExc: if no snapshot is found for the visit + of an origin. + """ + origin_info = None + visit_info = None + url_args = None + query_params = {} + branches = [] + releases = [] + browse_url = None + visit_url = None + branches_url = None + releases_url = None + swh_type = "snapshot" + if origin_url: + swh_type = "origin" + origin_info = service.lookup_origin({"url": origin_url}) + + visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) + fmt_date = format_utc_iso_date(visit_info["date"]) + visit_info["fmt_date"] = fmt_date + snapshot_id = visit_info["snapshot"] + + if not snapshot_id: + raise NotFoundExc( + "No snapshot associated to the visit of origin " + "%s on %s" % (escape(origin_url), fmt_date) + ) + + # provided timestamp is not necessarily equals to the one + # of the retrieved visit, so get the exact one in order + # use it in the urls generated below + if timestamp: + timestamp = visit_info["date"] + + branches, releases = get_origin_visit_snapshot( + origin_info, timestamp, visit_id, snapshot_id + ) + + url_args = {"origin_url": origin_info["url"]} + + query_params = {"visit_id": visit_id} + + browse_url = reverse("browse-origin-visits", url_args=url_args) + + if timestamp: + url_args["timestamp"] = format_utc_iso_date(timestamp, "%Y-%m-%dT%H:%M:%S") + visit_url = reverse( + "browse-origin-directory", url_args=url_args, query_params=query_params + ) + visit_info["url"] = visit_url + + branches_url = reverse( + "browse-origin-branches", url_args=url_args, query_params=query_params + ) + + releases_url = reverse( + "browse-origin-releases", url_args=url_args, query_params=query_params + ) + elif snapshot_id: + branches, releases = get_snapshot_content(snapshot_id) + url_args = {"snapshot_id": snapshot_id} + browse_url = reverse("browse-snapshot", url_args=url_args) + branches_url = reverse("browse-snapshot-branches", url_args=url_args) + + releases_url = reverse("browse-snapshot-releases", url_args=url_args) + + releases = list(reversed(releases)) + + snapshot_sizes = service.lookup_snapshot_sizes(snapshot_id) + + is_empty = sum(snapshot_sizes.values()) == 0 + + swh_snp_id = persistent_identifier("snapshot", snapshot_id) + + return { + "swh_type": swh_type, + "swh_object_id": swh_snp_id, + "snapshot_id": snapshot_id, + "snapshot_sizes": snapshot_sizes, + "is_empty": is_empty, + "origin_info": origin_info, + "visit_info": visit_info, + "branches": branches, + "releases": releases, + "branch": None, + "release": None, + "browse_url": browse_url, + "branches_url": branches_url, + "releases_url": releases_url, + "url_args": url_args, + "query_params": query_params, + } + + def _process_snapshot_request( request, snapshot_id=None, diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -8,7 +8,6 @@ import stat import textwrap -from collections import defaultdict from threading import Lock from django.core.cache import cache @@ -16,11 +15,9 @@ from django.utils.html import escape import sentry_sdk -from swh.model.identifiers import persistent_identifier from swh.web.common import highlightjs, service -from swh.web.common.exc import NotFoundExc, http_status_code_message +from swh.web.common.exc import http_status_code_message from swh.web.common.identifiers import get_swh_persistent_id -from swh.web.common.origin_visits import get_origin_visit from swh.web.common.utils import ( reverse, format_utc_iso_date, @@ -115,8 +112,6 @@ # with code highlighting content_display_max_size = get_config()["content_display_max_size"] -snapshot_content_max_size = get_config()["snapshot_content_max_size"] - def _re_encode_content(mimetype, encoding, content_data): # encode textual content to utf-8 if needed @@ -309,197 +304,6 @@ return {"content_data": content_data, "language": language, "mimetype": mime_type} -def process_snapshot_branches(snapshot): - """ - Process a dictionary describing snapshot branches: extract those - targeting revisions and releases, put them in two different lists, - then sort those lists in lexicographical order of the branches' names. - - Args: - snapshot_branches (dict): A dict describing the branches of a snapshot - as returned for instance by - :func:`swh.web.common.service.lookup_snapshot` - - Returns: - tuple: A tuple whose first member is the sorted list of branches - targeting revisions and second member the sorted list of branches - targeting releases - """ - snapshot_branches = snapshot["branches"] - branches = {} - branch_aliases = {} - releases = {} - revision_to_branch = defaultdict(set) - revision_to_release = defaultdict(set) - release_to_branch = defaultdict(set) - for branch_name, target in snapshot_branches.items(): - if not target: - # FIXME: display branches with an unknown target anyway - continue - target_id = target["target"] - target_type = target["target_type"] - if target_type == "revision": - branches[branch_name] = { - "name": branch_name, - "revision": target_id, - } - revision_to_branch[target_id].add(branch_name) - elif target_type == "release": - release_to_branch[target_id].add(branch_name) - elif target_type == "alias": - branch_aliases[branch_name] = target_id - # FIXME: handle pointers to other object types - - def _enrich_release_branch(branch, release): - releases[branch] = { - "name": release["name"], - "branch_name": branch, - "date": format_utc_iso_date(release["date"]), - "id": release["id"], - "message": release["message"], - "target_type": release["target_type"], - "target": release["target"], - } - - def _enrich_revision_branch(branch, revision): - branches[branch].update( - { - "revision": revision["id"], - "directory": revision["directory"], - "date": format_utc_iso_date(revision["date"]), - "message": revision["message"], - } - ) - - releases_info = service.lookup_release_multiple(release_to_branch.keys()) - for release in releases_info: - branches_to_update = release_to_branch[release["id"]] - for branch in branches_to_update: - _enrich_release_branch(branch, release) - if release["target_type"] == "revision": - revision_to_release[release["target"]].update(branches_to_update) - - revisions = service.lookup_revision_multiple( - set(revision_to_branch.keys()) | set(revision_to_release.keys()) - ) - - for revision in revisions: - if not revision: - continue - for branch in revision_to_branch[revision["id"]]: - _enrich_revision_branch(branch, revision) - for release in revision_to_release[revision["id"]]: - releases[release]["directory"] = revision["directory"] - - for branch_alias, branch_target in branch_aliases.items(): - if branch_target in branches: - branches[branch_alias] = dict(branches[branch_target]) - else: - snp = service.lookup_snapshot( - snapshot["id"], branches_from=branch_target, branches_count=1 - ) - if snp and branch_target in snp["branches"]: - - if snp["branches"][branch_target] is None: - continue - - target_type = snp["branches"][branch_target]["target_type"] - target = snp["branches"][branch_target]["target"] - if target_type == "revision": - branches[branch_alias] = snp["branches"][branch_target] - revision = service.lookup_revision(target) - _enrich_revision_branch(branch_alias, revision) - elif target_type == "release": - release = service.lookup_release(target) - _enrich_release_branch(branch_alias, release) - - if branch_alias in branches: - branches[branch_alias]["name"] = branch_alias - - ret_branches = list(sorted(branches.values(), key=lambda b: b["name"])) - ret_releases = list(sorted(releases.values(), key=lambda b: b["name"])) - - return ret_branches, ret_releases - - -def get_snapshot_content(snapshot_id): - """Returns the lists of branches and releases - associated to a swh snapshot. - That list is put in cache in order to speedup the navigation - in the swh-web/browse ui. - - .. warning:: At most 1000 branches contained in the snapshot - will be returned for performance reasons. - - Args: - snapshot_id (str): hexadecimal representation of the snapshot - identifier - - Returns: - A tuple with two members. The first one is a list of dict describing - the snapshot branches. The second one is a list of dict describing the - snapshot releases. - - Raises: - NotFoundExc if the snapshot does not exist - """ - cache_entry_id = "swh_snapshot_%s" % snapshot_id - cache_entry = cache.get(cache_entry_id) - - if cache_entry: - return cache_entry["branches"], cache_entry["releases"] - - branches = [] - releases = [] - - if snapshot_id: - snapshot = service.lookup_snapshot( - snapshot_id, branches_count=snapshot_content_max_size - ) - branches, releases = process_snapshot_branches(snapshot) - - cache.set(cache_entry_id, {"branches": branches, "releases": releases,}) - - return branches, releases - - -def get_origin_visit_snapshot( - origin_info, visit_ts=None, visit_id=None, snapshot_id=None -): - """Returns the lists of branches and releases - associated to a swh origin for a given visit. - The visit is expressed by a timestamp. In the latter case, - the closest visit from the provided timestamp will be used. - If no visit parameter is provided, it returns the list of branches - found for the latest visit. - That list is put in cache in order to speedup the navigation - in the swh-web/browse ui. - - .. warning:: At most 1000 branches contained in the snapshot - will be returned for performance reasons. - - Args: - origin_info (dict): a dict filled with origin information - (id, url, type) - visit_ts (int or str): an ISO date string or Unix timestamp to parse - visit_id (int): optional visit id for disambiguation in case - several visits have the same timestamp - - Returns: - A tuple with two members. The first one is a list of dict describing - the origin branches for the given visit. - The second one is a list of dict describing the origin releases - for the given visit. - - Raises: - NotFoundExc if the origin or its visit are not found - """ - - visit_info = get_origin_visit(origin_info, visit_ts, visit_id, snapshot_id) - - return get_snapshot_content(visit_info["snapshot"]) - - def gen_link(url, link_text=None, link_attrs=None): """ Utility function for generating an HTML link to insert @@ -885,136 +689,6 @@ return revision_log_data -def get_snapshot_context( - snapshot_id=None, origin_url=None, timestamp=None, visit_id=None -): - """ - Utility function to compute relevant information when navigating - the archive in a snapshot context. The snapshot is either - referenced by its id or it will be retrieved from an origin visit. - - Args: - snapshot_id (str): hexadecimal representation of a snapshot identifier, - all other parameters will be ignored if it is provided - origin_url (str): the origin_url - (e.g. https://github.com/(user)/(repo)/) - timestamp (str): a datetime string for retrieving the closest - visit of the origin - visit_id (int): optional visit id for disambiguation in case - of several visits with the same timestamp - - Returns: - A dict with the following entries: - * origin_info: dict containing origin information - * visit_info: dict containing visit information - * branches: the list of branches for the origin found - during the visit - * releases: the list of releases for the origin found - during the visit - * origin_browse_url: the url to browse the origin - * origin_branches_url: the url to browse the origin branches - * origin_releases_url': the url to browse the origin releases - * origin_visit_url: the url to browse the snapshot of the origin - found during the visit - * url_args: dict containing url arguments to use when browsing in - the context of the origin and its visit - - Raises: - swh.web.common.exc.NotFoundExc: if no snapshot is found for the visit - of an origin. - """ - origin_info = None - visit_info = None - url_args = None - query_params = {} - branches = [] - releases = [] - browse_url = None - visit_url = None - branches_url = None - releases_url = None - swh_type = "snapshot" - if origin_url: - swh_type = "origin" - origin_info = service.lookup_origin({"url": origin_url}) - - visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) - fmt_date = format_utc_iso_date(visit_info["date"]) - visit_info["fmt_date"] = fmt_date - snapshot_id = visit_info["snapshot"] - - if not snapshot_id: - raise NotFoundExc( - "No snapshot associated to the visit of origin " - "%s on %s" % (escape(origin_url), fmt_date) - ) - - # provided timestamp is not necessarily equals to the one - # of the retrieved visit, so get the exact one in order - # use it in the urls generated below - if timestamp: - timestamp = visit_info["date"] - - branches, releases = get_origin_visit_snapshot( - origin_info, timestamp, visit_id, snapshot_id - ) - - url_args = {"origin_url": origin_info["url"]} - - query_params = {"visit_id": visit_id} - - browse_url = reverse("browse-origin-visits", url_args=url_args) - - if timestamp: - url_args["timestamp"] = format_utc_iso_date(timestamp, "%Y-%m-%dT%H:%M:%S") - visit_url = reverse( - "browse-origin-directory", url_args=url_args, query_params=query_params - ) - visit_info["url"] = visit_url - - branches_url = reverse( - "browse-origin-branches", url_args=url_args, query_params=query_params - ) - - releases_url = reverse( - "browse-origin-releases", url_args=url_args, query_params=query_params - ) - elif snapshot_id: - branches, releases = get_snapshot_content(snapshot_id) - url_args = {"snapshot_id": snapshot_id} - browse_url = reverse("browse-snapshot", url_args=url_args) - branches_url = reverse("browse-snapshot-branches", url_args=url_args) - - releases_url = reverse("browse-snapshot-releases", url_args=url_args) - - releases = list(reversed(releases)) - - snapshot_sizes = service.lookup_snapshot_sizes(snapshot_id) - - is_empty = sum(snapshot_sizes.values()) == 0 - - swh_snp_id = persistent_identifier("snapshot", snapshot_id) - - return { - "swh_type": swh_type, - "swh_object_id": swh_snp_id, - "snapshot_id": snapshot_id, - "snapshot_sizes": snapshot_sizes, - "is_empty": is_empty, - "origin_info": origin_info, - "visit_info": visit_info, - "branches": branches, - "releases": releases, - "branch": None, - "release": None, - "browse_url": browse_url, - "branches_url": branches_url, - "releases_url": releases_url, - "url_args": url_args, - "query_params": query_params, - } - - # list of common readme names ordered by preference # (lower indices have higher priority) _common_readme_names = [ diff --git a/swh/web/browse/views/content.py b/swh/web/browse/views/content.py --- a/swh/web/browse/views/content.py +++ b/swh/web/browse/views/content.py @@ -15,19 +15,19 @@ from swh.model.hashutil import hash_to_hex -from swh.web.common import query, service, highlightjs -from swh.web.common.utils import reverse, gen_path_info, swh_object_icons -from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.browse.browseurls import browse_route +from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( request_content, prepare_content_for_display, content_display_max_size, - get_snapshot_context, get_swh_persistent_ids, gen_link, gen_directory_link, ) -from swh.web.browse.browseurls import browse_route +from swh.web.common import query, service, highlightjs +from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.utils import reverse, gen_path_info, swh_object_icons @browse_route( diff --git a/swh/web/browse/views/directory.py b/swh/web/browse/views/directory.py --- a/swh/web/browse/views/directory.py +++ b/swh/web/browse/views/directory.py @@ -10,18 +10,18 @@ from django.template.defaultfilters import filesizeformat import sentry_sdk -from swh.web.common import service -from swh.web.common.utils import reverse, gen_path_info -from swh.web.common.exc import handle_view_exception, NotFoundExc + +from swh.web.browse.browseurls import browse_route +from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( get_directory_entries, - get_snapshot_context, get_readme_to_display, get_swh_persistent_ids, gen_link, ) - -from swh.web.browse.browseurls import browse_route +from swh.web.common import service +from swh.web.common.exc import handle_view_exception, NotFoundExc +from swh.web.common.utils import reverse, gen_path_info @browse_route( diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py --- a/swh/web/browse/views/origin.py +++ b/swh/web/browse/views/origin.py @@ -5,25 +5,25 @@ from django.shortcuts import render, redirect -from swh.web.common import service -from swh.web.common.origin_visits import get_origin_visits -from swh.web.common.utils import reverse, format_utc_iso_date, parse_timestamp -from swh.web.common.exc import handle_view_exception -from swh.web.browse.utils import get_snapshot_context -from swh.web.browse.browseurls import browse_route -from .utils.snapshot_context import ( +from swh.web.browse.browseurls import browse_route +from swh.web.browse.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, browse_snapshot_branches, browse_snapshot_releases, + get_snapshot_context, ) +from swh.web.common import service +from swh.web.common.exc import handle_view_exception +from swh.web.common.origin_visits import get_origin_visits +from swh.web.common.utils import reverse, format_utc_iso_date, parse_timestamp @browse_route( r"origin/(?P.+)/visit/(?P.+)/directory/", - r"origin/(?P.+)/visit/(?P.+)" "/directory/(?P.+)/", + r"origin/(?P.+)/visit/(?P.+)/directory/(?P.+)/", r"origin/(?P.+)/directory/", r"origin/(?P.+)/directory/(?P.+)/", view_name="browse-origin-directory", @@ -43,7 +43,7 @@ @browse_route( - r"origin/(?P.+)/visit/(?P.+)" "/content/(?P.+)/", + r"origin/(?P.+)/visit/(?P.+)/content/(?P.+)/", r"origin/(?P.+)/content/(?P.+)/", view_name="browse-origin-content", ) diff --git a/swh/web/browse/views/release.py b/swh/web/browse/views/release.py --- a/swh/web/browse/views/release.py +++ b/swh/web/browse/views/release.py @@ -6,13 +6,10 @@ from django.shortcuts import render import sentry_sdk -from swh.web.common import service -from swh.web.common.utils import reverse, format_utc_iso_date -from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.browseurls import browse_route +from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( gen_revision_link, - get_snapshot_context, gen_link, gen_snapshot_link, get_swh_persistent_ids, @@ -21,6 +18,9 @@ gen_release_link, gen_person_mail_link, ) +from swh.web.common import service +from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.utils import reverse, format_utc_iso_date @browse_route( diff --git a/swh/web/browse/views/revision.py b/swh/web/browse/views/revision.py --- a/swh/web/browse/views/revision.py +++ b/swh/web/browse/views/revision.py @@ -14,20 +14,12 @@ from django.utils.safestring import mark_safe from swh.model.identifiers import persistent_identifier -from swh.web.common import service -from swh.web.common.utils import ( - reverse, - format_utc_iso_date, - gen_path_info, - swh_object_icons, -) -from swh.web.common.exc import NotFoundExc, handle_view_exception from swh.web.browse.browseurls import browse_route +from swh.web.browse.snapshot_context import get_snapshot_context from swh.web.browse.utils import ( gen_link, gen_revision_link, gen_revision_url, - get_snapshot_context, get_revision_log_url, get_directory_entries, gen_directory_link, @@ -40,6 +32,14 @@ format_log_entries, gen_person_mail_link, ) +from swh.web.common import service +from swh.web.common.exc import NotFoundExc, handle_view_exception +from swh.web.common.utils import ( + reverse, + format_utc_iso_date, + gen_path_info, + swh_object_icons, +) def _gen_content_url(revision, query_string, path, snapshot_context): diff --git a/swh/web/browse/views/snapshot.py b/swh/web/browse/views/snapshot.py --- a/swh/web/browse/views/snapshot.py +++ b/swh/web/browse/views/snapshot.py @@ -9,7 +9,7 @@ from swh.web.browse.browseurls import browse_route from swh.web.common.utils import reverse -from .utils.snapshot_context import ( +from swh.web.browse.snapshot_context import ( browse_snapshot_directory, browse_snapshot_content, browse_snapshot_log, diff --git a/swh/web/browse/views/utils/__init__.py b/swh/web/browse/views/utils/__init__.py deleted file mode 100644 diff --git a/swh/web/tests/browse/test_utils.py b/swh/web/tests/browse/test_snapshot_context.py copy from swh/web/tests/browse/test_utils.py copy to swh/web/tests/browse/test_snapshot_context.py --- a/swh/web/tests/browse/test_utils.py +++ b/swh/web/tests/browse/test_snapshot_context.py @@ -1,23 +1,15 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import given -from swh.web.browse import utils -from swh.web.common.utils import reverse, format_utc_iso_date +from swh.web.browse.snapshot_context import get_origin_visit_snapshot +from swh.web.common.utils import format_utc_iso_date from swh.web.tests.strategies import origin_with_multiple_visits -def test_get_mimetype_and_encoding_for_content(): - text = b"Hello world!" - assert utils.get_mimetype_and_encoding_for_content(text) == ( - "text/plain", - "us-ascii", - ) - - @given(origin_with_multiple_visits()) def test_get_origin_visit_snapshot_simple(archive_data, origin): visits = archive_data.origin_visit_get(origin["url"]) @@ -66,66 +58,8 @@ assert branches and releases, "Incomplete test data." - origin_visit_branches = utils.get_origin_visit_snapshot( + origin_visit_branches = get_origin_visit_snapshot( origin, visit_id=visit["visit"] ) assert origin_visit_branches == (branches, releases) - - -def test_gen_link(): - assert ( - utils.gen_link("https://www.softwareheritage.org/", "swh") - == 'swh' - ) - - -def test_gen_revision_link(): - revision_id = "28a0bc4120d38a394499382ba21d6965a67a3703" - revision_url = reverse("browse-revision", url_args={"sha1_git": revision_id}) - - assert utils.gen_revision_link( - revision_id, link_text=None, link_attrs=None - ) == '%s' % (revision_url, revision_id) - assert utils.gen_revision_link( - revision_id, shorten_id=True, link_attrs=None - ) == '%s' % (revision_url, revision_id[:7]) - - -def test_gen_person_mail_link(): - person_full = { - "name": "John Doe", - "email": "john.doe@swh.org", - "fullname": "John Doe ", - } - - assert utils.gen_person_mail_link(person_full) == '%s' % ( - person_full["email"], - person_full["name"], - ) - - link_text = "Mail" - assert utils.gen_person_mail_link( - person_full, link_text=link_text - ) == '%s' % (person_full["email"], link_text) - - person_partial_email = {"name": None, "email": None, "fullname": "john.doe@swh.org"} - - assert utils.gen_person_mail_link( - person_partial_email - ) == '%s' % ( - person_partial_email["fullname"], - person_partial_email["fullname"], - ) - - person_partial = { - "name": None, - "email": None, - "fullname": "John Doe ", - } - - assert utils.gen_person_mail_link(person_partial) == person_partial["fullname"] - - person_none = {"name": None, "email": None, "fullname": None} - - assert utils.gen_person_mail_link(person_none) == "None" diff --git a/swh/web/tests/browse/test_utils.py b/swh/web/tests/browse/test_utils.py --- a/swh/web/tests/browse/test_utils.py +++ b/swh/web/tests/browse/test_utils.py @@ -3,79 +3,23 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from hypothesis import given - -from swh.web.browse import utils -from swh.web.common.utils import reverse, format_utc_iso_date -from swh.web.tests.strategies import origin_with_multiple_visits +from swh.web.browse.utils import ( + get_mimetype_and_encoding_for_content, + gen_link, + gen_revision_link, + gen_person_mail_link, +) +from swh.web.common.utils import reverse def test_get_mimetype_and_encoding_for_content(): text = b"Hello world!" - assert utils.get_mimetype_and_encoding_for_content(text) == ( - "text/plain", - "us-ascii", - ) - - -@given(origin_with_multiple_visits()) -def test_get_origin_visit_snapshot_simple(archive_data, origin): - visits = archive_data.origin_visit_get(origin["url"]) - - for visit in visits: - - snapshot = archive_data.snapshot_get(visit["snapshot"]) - branches = [] - releases = [] - - def _process_branch_data(branch, branch_data): - if branch_data["target_type"] == "revision": - rev_data = archive_data.revision_get(branch_data["target"]) - branches.append( - { - "name": branch, - "revision": branch_data["target"], - "directory": rev_data["directory"], - "date": format_utc_iso_date(rev_data["date"]), - "message": rev_data["message"], - } - ) - elif branch_data["target_type"] == "release": - rel_data = archive_data.release_get(branch_data["target"]) - rev_data = archive_data.revision_get(rel_data["target"]) - releases.append( - { - "name": rel_data["name"], - "branch_name": branch, - "date": format_utc_iso_date(rel_data["date"]), - "id": rel_data["id"], - "message": rel_data["message"], - "target_type": rel_data["target_type"], - "target": rel_data["target"], - "directory": rev_data["directory"], - } - ) - - for branch in sorted(snapshot["branches"].keys()): - branch_data = snapshot["branches"][branch] - if branch_data["target_type"] == "alias": - target_data = snapshot["branches"][branch_data["target"]] - _process_branch_data(branch, target_data) - else: - _process_branch_data(branch, branch_data) - - assert branches and releases, "Incomplete test data." - - origin_visit_branches = utils.get_origin_visit_snapshot( - origin, visit_id=visit["visit"] - ) - - assert origin_visit_branches == (branches, releases) + assert get_mimetype_and_encoding_for_content(text) == ("text/plain", "us-ascii",) def test_gen_link(): assert ( - utils.gen_link("https://www.softwareheritage.org/", "swh") + gen_link("https://www.softwareheritage.org/", "swh") == 'swh' ) @@ -84,10 +28,10 @@ revision_id = "28a0bc4120d38a394499382ba21d6965a67a3703" revision_url = reverse("browse-revision", url_args={"sha1_git": revision_id}) - assert utils.gen_revision_link( + assert gen_revision_link( revision_id, link_text=None, link_attrs=None ) == '%s' % (revision_url, revision_id) - assert utils.gen_revision_link( + assert gen_revision_link( revision_id, shorten_id=True, link_attrs=None ) == '%s' % (revision_url, revision_id[:7]) @@ -99,19 +43,19 @@ "fullname": "John Doe ", } - assert utils.gen_person_mail_link(person_full) == '%s' % ( + assert gen_person_mail_link(person_full) == '%s' % ( person_full["email"], person_full["name"], ) link_text = "Mail" - assert utils.gen_person_mail_link( + assert gen_person_mail_link( person_full, link_text=link_text ) == '%s' % (person_full["email"], link_text) person_partial_email = {"name": None, "email": None, "fullname": "john.doe@swh.org"} - assert utils.gen_person_mail_link( + assert gen_person_mail_link( person_partial_email ) == '%s' % ( person_partial_email["fullname"], @@ -124,8 +68,8 @@ "fullname": "John Doe ", } - assert utils.gen_person_mail_link(person_partial) == person_partial["fullname"] + assert gen_person_mail_link(person_partial) == person_partial["fullname"] person_none = {"name": None, "email": None, "fullname": None} - assert utils.gen_person_mail_link(person_none) == "None" + assert gen_person_mail_link(person_none) == "None" diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py --- a/swh/web/tests/browse/views/test_origin.py +++ b/swh/web/tests/browse/views/test_origin.py @@ -7,15 +7,13 @@ import re import string -import swh.web.browse.utils - from django.utils.html import escape from hypothesis import given from swh.model.hashutil import hash_to_bytes from swh.model.model import Snapshot -from swh.web.browse.utils import process_snapshot_branches +from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import get_swh_persistent_id from swh.web.common.utils import ( @@ -24,6 +22,7 @@ format_utc_iso_date, parse_timestamp, ) +from swh.web.config import get_config from swh.web.tests.data import get_content, random_sha1 from swh.web.tests.django_asserts import assert_contains, assert_template_used from swh.web.tests.strategies import ( @@ -443,19 +442,17 @@ def test_origin_request_errors(client, archive_data, mocker): - mock_snapshot_service = mocker.patch( - "swh.web.browse.views.utils.snapshot_context.service" - ) + mock_snapshot_service = mocker.patch("swh.web.browse.snapshot_context.service") mock_origin_service = mocker.patch("swh.web.browse.views.origin.service") mock_utils_service = mocker.patch("swh.web.browse.utils.service") mock_get_origin_visit_snapshot = mocker.patch( - "swh.web.browse.utils.get_origin_visit_snapshot" + "swh.web.browse.snapshot_context.get_origin_visit_snapshot" ) mock_get_origin_visits = mocker.patch( "swh.web.common.origin_visits.get_origin_visits" ) mock_request_content = mocker.patch( - "swh.web.browse.views.utils.snapshot_context.request_content" + "swh.web.browse.snapshot_context.request_content" ) mock_origin_service.lookup_origin.side_effect = NotFoundExc("origin not found") url = reverse("browse-origin-visits", url_args={"origin_url": "bar"}) @@ -512,7 +509,7 @@ ], [], ) - mock_utils_service.lookup_snapshot_sizes.return_value = { + mock_snapshot_service.lookup_snapshot_sizes.return_value = { "revision": 1, "release": 0, } @@ -564,11 +561,11 @@ ] mock_get_origin_visit_snapshot.side_effect = None mock_get_origin_visit_snapshot.return_value = ([], []) - mock_utils_service.lookup_snapshot_sizes.return_value = { + mock_snapshot_service.lookup_snapshot_sizes.return_value = { "revision": 0, "release": 0, } - mock_utils_service.lookup_origin.return_value = { + mock_snapshot_service.lookup_origin.return_value = { "type": "foo", "url": "bar", "id": 457, @@ -593,7 +590,7 @@ ], [], ) - mock_utils_service.lookup_snapshot_sizes.return_value = { + mock_snapshot_service.lookup_snapshot_sizes.return_value = { "revision": 1, "release": 0, } @@ -610,7 +607,7 @@ assert_contains(resp, "Content not found", status_code=404) mock_get_snapshot_context = mocker.patch( - "swh.web.browse.views.utils.snapshot_context.get_snapshot_context" + "swh.web.browse.snapshot_context.get_snapshot_context" ) mock_get_snapshot_context.side_effect = NotFoundExc("Snapshot not found") @@ -622,9 +619,9 @@ def test_origin_empty_snapshot(client, mocker): - mock_utils_service = mocker.patch("swh.web.browse.utils.service") + mock_utils_service = mocker.patch("swh.web.browse.snapshot_context.service") mock_get_origin_visit_snapshot = mocker.patch( - "swh.web.browse.utils.get_origin_visit_snapshot" + "swh.web.browse.snapshot_context.get_origin_visit_snapshot" ) mock_get_origin_visits = mocker.patch( "swh.web.common.origin_visits.get_origin_visits" @@ -660,9 +657,10 @@ @given(origin_with_releases()) def test_origin_release_browse(client, archive_data, origin): - # for swh.web.browse.utils.get_snapshot_content to only return one branch - snapshot_max_size = swh.web.browse.utils.snapshot_content_max_size - swh.web.browse.utils.snapshot_content_max_size = 1 + # for swh.web.browse.snapshot_context.get_snapshot_content to only return one branch + config = get_config() + snapshot_max_size = int(config["snapshot_content_max_size"]) + config["snapshot_content_max_size"] = 1 try: snapshot = archive_data.snapshot_get_latest(origin["url"]) release = [ @@ -680,7 +678,7 @@ assert_contains(resp, release_data["name"]) assert_contains(resp, release["target"]) finally: - swh.web.browse.utils.snapshot_content_max_size = snapshot_max_size + config["snapshot_content_max_size"] = snapshot_max_size @given(origin_with_releases()) @@ -1084,9 +1082,7 @@ When a snapshot contains a branch or a release alias, pagination links in the branches / releases view should be displayed. """ - mocker.patch( - "swh.web.browse.views.utils.snapshot_context.PER_PAGE", len(revisions) / 2 - ) + mocker.patch("swh.web.browse.snapshot_context.PER_PAGE", len(revisions) / 2) snp_dict = {"branches": {}, "id": hash_to_bytes(random_sha1())} for i in range(len(revisions)): branch = "".join(random.choices(string.ascii_lowercase, k=8))