Page MenuHomeSoftware Heritage

revision.py
No OneTemporary

revision.py

# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import hashlib
import json
import textwrap
from django.http import JsonResponse
from django.shortcuts import render
from django.utils.safestring import mark_safe
from swh.model.hashutil import hash_to_bytes
from swh.model.swhids import CoreSWHID, ObjectType
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.browse.utils import (
content_display_max_size,
format_log_entries,
gen_link,
gen_person_mail_link,
gen_revision_url,
get_directory_entries,
get_readme_to_display,
get_revision_log_url,
prepare_content_for_display,
request_content,
)
from swh.web.common import archive
from swh.web.common.exc import NotFoundExc, http_status_code_message
from swh.web.common.identifiers import get_swhids_info
from swh.web.common.typing import RevisionMetadata, SWHObjectInfo
from swh.web.common.utils import (
format_utc_iso_date,
gen_path_info,
reverse,
swh_object_icons,
)
def _gen_content_url(revision, query_string, path, snapshot_context):
if snapshot_context:
query_params = snapshot_context["query_params"]
query_params["path"] = path
query_params["revision"] = revision["id"]
content_url = reverse("browse-origin-content", query_params=query_params)
else:
content_path = "%s/%s" % (revision["directory"], path)
content_url = reverse(
"browse-content",
url_args={"query_string": query_string},
query_params={"path": content_path},
)
return content_url
def _gen_diff_link(idx, diff_anchor, link_text):
if idx < _max_displayed_file_diffs:
return gen_link(diff_anchor, link_text)
else:
return link_text
# TODO: put in conf
_max_displayed_file_diffs = 1000
def _gen_revision_changes_list(revision, changes, snapshot_context):
"""
Returns a HTML string describing the file changes
introduced in a revision.
As this string will be displayed in the browse revision view,
links to adequate file diffs are also generated.
Args:
revision (str): hexadecimal representation of a revision identifier
changes (list): list of file changes in the revision
snapshot_context (dict): optional origin context used to reverse
the content urls
Returns:
A string to insert in a revision HTML view.
"""
changes_msg = []
for i, change in enumerate(changes):
hasher = hashlib.sha1()
from_query_string = ""
to_query_string = ""
diff_id = "diff-"
if change["from"]:
from_query_string = "sha1_git:" + change["from"]["target"]
diff_id += change["from"]["target"] + "-" + change["from_path"]
diff_id += "-"
if change["to"]:
to_query_string = "sha1_git:" + change["to"]["target"]
diff_id += change["to"]["target"] + change["to_path"]
change["path"] = change["to_path"] or change["from_path"]
url_args = {
"from_query_string": from_query_string,
"to_query_string": to_query_string,
}
query_params = {"path": change["path"]}
change["diff_url"] = reverse(
"diff-contents", url_args=url_args, query_params=query_params
)
hasher.update(diff_id.encode("utf-8"))
diff_id = hasher.hexdigest()
change["id"] = diff_id
diff_link = "#diff_" + diff_id
if change["type"] == "modify":
change["content_url"] = _gen_content_url(
revision, to_query_string, change["to_path"], snapshot_context
)
changes_msg.append(
"modified: %s" % _gen_diff_link(i, diff_link, change["to_path"])
)
elif change["type"] == "insert":
change["content_url"] = _gen_content_url(
revision, to_query_string, change["to_path"], snapshot_context
)
changes_msg.append(
"new file: %s" % _gen_diff_link(i, diff_link, change["to_path"])
)
elif change["type"] == "delete":
parent = archive.lookup_revision(revision["parents"][0])
change["content_url"] = _gen_content_url(
parent, from_query_string, change["from_path"], snapshot_context
)
changes_msg.append(
"deleted: %s" % _gen_diff_link(i, diff_link, change["from_path"])
)
elif change["type"] == "rename":
change["content_url"] = _gen_content_url(
revision, to_query_string, change["to_path"], snapshot_context
)
link_text = change["from_path"] + " &rarr; " + change["to_path"]
changes_msg.append(
"renamed: %s" % _gen_diff_link(i, diff_link, link_text)
)
if not changes:
changes_msg.append("No changes")
return mark_safe("\n".join(changes_msg))
@browse_route(
r"revision/(?P<sha1_git>[0-9a-f]+)/diff/",
view_name="diff-revision",
checksum_args=["sha1_git"],
)
def _revision_diff(request, sha1_git):
"""
Browse internal endpoint to compute revision diff
"""
revision = archive.lookup_revision(sha1_git)
snapshot_context = None
origin_url = request.GET.get("origin_url", None)
if not origin_url:
origin_url = request.GET.get("origin", None)
timestamp = request.GET.get("timestamp", None)
visit_id = request.GET.get("visit_id", None)
if origin_url:
snapshot_context = get_snapshot_context(
origin_url=origin_url, timestamp=timestamp, visit_id=visit_id
)
changes = archive.diff_revision(sha1_git)
changes_msg = _gen_revision_changes_list(revision, changes, snapshot_context)
diff_data = {
"total_nb_changes": len(changes),
"changes": changes[:_max_displayed_file_diffs],
"changes_msg": changes_msg,
}
return JsonResponse(diff_data)
NB_LOG_ENTRIES = 100
@browse_route(
r"revision/(?P<sha1_git>[0-9a-f]+)/log/",
view_name="browse-revision-log",
checksum_args=["sha1_git"],
)
def revision_log_browse(request, sha1_git):
"""
Django view that produces an HTML display of the history
log for a revision identified by its id.
The url that points to it is :http:get:`/browse/revision/(sha1_git)/log/`
"""
origin_url = request.GET.get("origin_url")
snapshot_id = request.GET.get("snapshot")
snapshot_context = None
if origin_url or snapshot_id:
visit_id = int(request.GET.get("visit_id", 0))
snapshot_context = get_snapshot_context(
snapshot_id=snapshot_id,
origin_url=origin_url,
timestamp=request.GET.get("timestamp"),
visit_id=visit_id or None,
branch_name=request.GET.get("branch"),
release_name=request.GET.get("release"),
revision_id=sha1_git,
)
per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES))
offset = int(request.GET.get("offset", 0))
revs_ordering = request.GET.get("revs_ordering", "committer_date")
session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering)
rev_log_session = request.session.get(session_key, None)
rev_log = []
revs_walker_state = None
if rev_log_session:
rev_log = rev_log_session["rev_log"]
revs_walker_state = rev_log_session["revs_walker_state"]
if len(rev_log) < offset + per_page:
revs_walker = archive.get_revisions_walker(
revs_ordering,
sha1_git,
max_revs=offset + per_page + 1,
state=revs_walker_state,
)
rev_log += [rev["id"] for rev in revs_walker]
revs_walker_state = revs_walker.export_state()
revs = rev_log[offset : offset + per_page]
revision_log = archive.lookup_revision_multiple(revs)
request.session[session_key] = {
"rev_log": rev_log,
"revs_walker_state": revs_walker_state,
}
revs_ordering = request.GET.get("revs_ordering", "")
prev_log_url = None
if len(rev_log) > offset + per_page:
prev_log_url = reverse(
"browse-revision-log",
url_args={"sha1_git": sha1_git},
query_params={
"per_page": per_page,
"offset": offset + per_page,
"revs_ordering": revs_ordering or None,
},
)
next_log_url = None
if offset != 0:
next_log_url = reverse(
"browse-revision-log",
url_args={"sha1_git": sha1_git},
query_params={
"per_page": per_page,
"offset": offset - per_page,
"revs_ordering": revs_ordering or None,
},
)
revision_log_data = format_log_entries(revision_log, per_page)
swh_rev_id = str(
CoreSWHID(object_type=ObjectType.REVISION, object_id=hash_to_bytes(sha1_git))
)
return render(
request,
"browse/revision-log.html",
{
"heading": "Revision history",
"swh_object_id": swh_rev_id,
"swh_object_name": "Revisions history",
"swh_object_metadata": None,
"revision_log": revision_log_data,
"revs_ordering": revs_ordering,
"next_log_url": next_log_url,
"prev_log_url": prev_log_url,
"breadcrumbs": None,
"top_right_link": None,
"snapshot_context": snapshot_context,
"vault_cooking": None,
"show_actions": True,
"swhids_info": None,
},
)
@browse_route(
r"revision/(?P<sha1_git>[0-9a-f]+)/",
view_name="browse-revision",
checksum_args=["sha1_git"],
)
def revision_browse(request, sha1_git):
"""
Django view that produces an HTML display of a revision
identified by its id.
The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
"""
revision = archive.lookup_revision(sha1_git)
origin_info = None
snapshot_context = None
origin_url = request.GET.get("origin_url")
if not origin_url:
origin_url = request.GET.get("origin")
timestamp = request.GET.get("timestamp")
visit_id = int(request.GET.get("visit_id", 0))
snapshot_id = request.GET.get("snapshot_id")
if not snapshot_id:
snapshot_id = request.GET.get("snapshot")
path = request.GET.get("path")
dir_id = None
dirs, files = [], []
content_data = {}
if origin_url:
try:
snapshot_context = get_snapshot_context(
snapshot_id=snapshot_id,
origin_url=origin_url,
timestamp=timestamp,
visit_id=visit_id or None,
branch_name=request.GET.get("branch"),
release_name=request.GET.get("release"),
revision_id=sha1_git,
path=path,
)
except NotFoundExc as e:
raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git})
error_message = (
"The Software Heritage archive has a revision "
"with the hash you provided but the origin "
"mentioned in your request appears broken: %s. "
"Please check the URL and try again.\n\n"
"Nevertheless, you can still browse the revision "
"without origin information: %s"
% (gen_link(origin_url), gen_link(raw_rev_url))
)
if str(e).startswith("Origin"):
raise NotFoundExc(error_message)
else:
raise e
origin_info = snapshot_context["origin_info"]
snapshot_id = snapshot_context["snapshot_id"]
elif snapshot_id:
snapshot_context = get_snapshot_context(snapshot_id)
error_info = {"status_code": 200, "description": None}
if path:
try:
file_info = archive.lookup_directory_with_path(revision["directory"], path)
if file_info["type"] == "dir":
dir_id = file_info["target"]
else:
query_string = "sha1_git:" + file_info["target"]
content_data = request_content(query_string)
except NotFoundExc as e:
error_info["status_code"] = 404
error_info["description"] = f"NotFoundExc: {str(e)}"
else:
dir_id = revision["directory"]
if dir_id:
path = "" if path is None else (path + "/")
dirs, files = get_directory_entries(dir_id)
revision_metadata = RevisionMetadata(
object_type=ObjectType.REVISION,
object_id=sha1_git,
revision=sha1_git,
author=revision["author"]["fullname"] if revision["author"] else "None",
author_url=gen_person_mail_link(revision["author"])
if revision["author"]
else "None",
committer=revision["committer"]["fullname"]
if revision["committer"]
else "None",
committer_url=gen_person_mail_link(revision["committer"])
if revision["committer"]
else "None",
committer_date=format_utc_iso_date(revision["committer_date"]),
date=format_utc_iso_date(revision["date"]),
directory=revision["directory"],
merge=revision["merge"],
metadata=json.dumps(
revision["metadata"], sort_keys=True, indent=4, separators=(",", ": ")
),
parents=revision["parents"],
synthetic=revision["synthetic"],
type=revision["type"],
snapshot=snapshot_id,
origin_url=origin_url,
)
message_lines = ["None"]
if revision["message"]:
message_lines = revision["message"].split("\n")
parents = []
for p in revision["parents"]:
parent_url = gen_revision_url(p, snapshot_context)
parents.append({"id": p, "url": parent_url})
path_info = gen_path_info(path)
query_params = snapshot_context["query_params"] if snapshot_context else {}
breadcrumbs = []
breadcrumbs.append(
{
"name": revision["directory"][:7],
"url": reverse(
"browse-revision",
url_args={"sha1_git": sha1_git},
query_params=query_params,
),
}
)
for pi in path_info:
query_params["path"] = pi["path"]
breadcrumbs.append(
{
"name": pi["name"],
"url": reverse(
"browse-revision",
url_args={"sha1_git": sha1_git},
query_params=query_params,
),
}
)
vault_cooking = {
"directory_context": False,
"directory_swhid": None,
"revision_context": True,
"revision_swhid": f"swh:1:rev:{sha1_git}",
}
swh_objects = [SWHObjectInfo(object_type=ObjectType.REVISION, object_id=sha1_git)]
content = None
content_size = None
filename = None
mimetype = None
language = None
readme_name = None
readme_url = None
readme_html = None
readmes = {}
extra_context = dict(revision_metadata)
extra_context["path"] = f"/{path}" if path else None
if content_data:
breadcrumbs[-1]["url"] = None
content_size = content_data["length"]
mimetype = content_data["mimetype"]
if content_data["raw_data"]:
content_display_data = prepare_content_for_display(
content_data["raw_data"], content_data["mimetype"], path
)
content = content_display_data["content_data"]
language = content_display_data["language"]
mimetype = content_display_data["mimetype"]
if path:
filename = path_info[-1]["name"]
query_params["filename"] = filename
filepath = "/".join(pi["name"] for pi in path_info[:-1])
extra_context["path"] = f"/{filepath}/" if filepath else "/"
extra_context["filename"] = filename
top_right_link = {
"url": reverse(
"browse-content-raw",
url_args={"query_string": query_string},
query_params={"filename": filename},
),
"icon": swh_object_icons["content"],
"text": "Raw File",
}
swh_objects.append(
SWHObjectInfo(object_type=ObjectType.CONTENT, object_id=file_info["target"])
)
else:
for d in dirs:
if d["type"] == "rev":
d["url"] = reverse(
"browse-revision", url_args={"sha1_git": d["target"]}
)
else:
query_params["path"] = path + d["name"]
d["url"] = reverse(
"browse-revision",
url_args={"sha1_git": sha1_git},
query_params=query_params,
)
for f in files:
query_params["path"] = path + f["name"]
f["url"] = reverse(
"browse-revision",
url_args={"sha1_git": sha1_git},
query_params=query_params,
)
if f["name"].lower().startswith("readme"):
readmes[f["name"]] = f["checksums"]["sha1"]
readme_name, readme_url, readme_html = get_readme_to_display(readmes)
top_right_link = {
"url": get_revision_log_url(sha1_git, snapshot_context),
"icon": swh_object_icons["revisions history"],
"text": "History",
}
vault_cooking["directory_context"] = True
vault_cooking["directory_swhid"] = f"swh:1:dir:{dir_id}"
swh_objects.append(
SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=dir_id)
)
query_params.pop("path", None)
diff_revision_url = reverse(
"diff-revision",
url_args={"sha1_git": sha1_git},
query_params=query_params,
)
if snapshot_id:
swh_objects.append(
SWHObjectInfo(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id)
)
swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context)
heading = "Revision - %s - %s" % (
sha1_git[:7],
textwrap.shorten(message_lines[0], width=70),
)
if snapshot_context:
context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
if origin_info:
context_found = "origin: %s" % origin_info["url"]
heading += " - %s" % context_found
return render(
request,
"browse/revision.html",
{
"heading": heading,
"swh_object_id": swhids_info[0]["swhid"],
"swh_object_name": "Revision",
"swh_object_metadata": revision_metadata,
"message_header": message_lines[0],
"message_body": "\n".join(message_lines[1:]),
"parents": parents,
"snapshot_context": snapshot_context,
"dirs": dirs,
"files": files,
"content": content,
"content_size": content_size,
"max_content_size": content_display_max_size,
"filename": filename,
"encoding": content_data.get("encoding"),
"mimetype": mimetype,
"language": language,
"readme_name": readme_name,
"readme_url": readme_url,
"readme_html": readme_html,
"breadcrumbs": breadcrumbs,
"top_right_link": top_right_link,
"vault_cooking": vault_cooking,
"diff_revision_url": diff_revision_url,
"show_actions": True,
"swhids_info": swhids_info,
"error_code": error_info["status_code"],
"error_message": http_status_code_message.get(error_info["status_code"]),
"error_description": error_info["description"],
},
status=error_info["status_code"],
)

File Metadata

Mime Type
text/x-python
Expires
Fri, Jul 4, 2:29 PM (2 d, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3296505

Event Timeline