Page MenuHomeSoftware Heritage

D4729.diff
No OneTemporary

D4729.diff

diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py
--- a/swh/web/common/archive.py
+++ b/swh/web/common/archive.py
@@ -8,6 +8,7 @@
import os
import re
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union
+from urllib.parse import urlparse
from swh.model import hashutil
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
@@ -248,6 +249,22 @@
# slash while the url in storage have it (e.g. Debian source package)
else:
origin_urls.append(f"{origin['url']}/")
+ try:
+ # handle case where the "://" character sequence was mangled into ":/"
+ parsed_url = urlparse(origin["url"])
+ if (
+ parsed_url.scheme
+ and not parsed_url.netloc
+ and origin["url"].startswith(f"{parsed_url.scheme}:/")
+ and not origin["url"].startswith(f"{parsed_url.scheme}://")
+ ):
+ origin_urls.append(
+ origin["url"].replace(
+ f"{parsed_url.scheme}:/", f"{parsed_url.scheme}://"
+ )
+ )
+ except Exception:
+ pass
origins = [o for o in storage.origin_get(origin_urls) if o is not None]
if not origins:
msg = "Origin with url %s not found!" % origin["url"]
diff --git a/swh/web/common/identifiers.py b/swh/web/common/identifiers.py
--- a/swh/web/common/identifiers.py
+++ b/swh/web/common/identifiers.py
@@ -116,7 +116,9 @@
query_dict[k] = query_params[k]
if "origin" in swhid_parsed.metadata:
- query_dict["origin_url"] = unquote(swhid_parsed.metadata["origin"])
+ origin_url = unquote(swhid_parsed.metadata["origin"])
+ origin_url = archive.lookup_origin({"url": origin_url})["url"]
+ query_dict["origin_url"] = origin_url
if "anchor" in swhid_parsed.metadata:
anchor_swhid_parsed = get_swhid(swhid_parsed.metadata["anchor"])
diff --git a/swh/web/tests/browse/views/test_identifiers.py b/swh/web/tests/browse/views/test_identifiers.py
--- a/swh/web/tests/browse/views/test_identifiers.py
+++ b/swh/web/tests/browse/views/test_identifiers.py
@@ -9,6 +9,7 @@
from hypothesis import given
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
+from swh.model.model import Origin
from swh.web.common.identifiers import gen_swhid
from swh.web.common.utils import reverse
from swh.web.tests.django_asserts import assert_contains
@@ -123,9 +124,12 @@
@given(content())
-def test_content_id_optional_parts_browse(client, content):
+def test_content_id_optional_parts_browse(client, archive_data, content):
cnt_sha1_git = content["sha1_git"]
origin_url = "https://github.com/user/repo"
+
+ archive_data.origin_add([Origin(url=origin_url)])
+
swhid = gen_swhid(
CONTENT, cnt_sha1_git, metadata={"lines": "4-20", "origin": origin_url},
)
@@ -187,8 +191,9 @@
@given(directory())
-def test_browse_swhid_special_characters_escaping(client, directory):
+def test_browse_swhid_special_characters_escaping(client, archive_data, directory):
origin = "http://example.org/?project=abc;"
+ archive_data.origin_add([Origin(url=origin)])
origin_swhid_escaped = quote(origin, safe="/?:@&")
origin_swhid_url_escaped = quote(origin, safe="/:@;")
swhid = gen_swhid(DIRECTORY, directory, metadata={"origin": origin_swhid_escaped})
diff --git a/swh/web/tests/common/test_archive.py b/swh/web/tests/common/test_archive.py
--- a/swh/web/tests/common/test_archive.py
+++ b/swh/web/tests/common/test_archive.py
@@ -975,6 +975,14 @@
assert origin_info["url"] == deb_origin.url
+def test_lookup_origin_single_slash_after_protocol(archive_data):
+ origin_url = "http://snapshot.debian.org/package/r-base/"
+ malformed_origin_url = "http:/snapshot.debian.org/package/r-base/"
+ archive_data.origin_add([Origin(url=origin_url)])
+ origin_info = archive.lookup_origin({"url": malformed_origin_url})
+ assert origin_info["url"] == origin_url
+
+
@given(snapshot())
def test_lookup_snapshot_branch_name_from_tip_revision(archive_data, snapshot_id):
snapshot = archive_data.snapshot_get(snapshot_id)
diff --git a/swh/web/tests/common/test_identifiers.py b/swh/web/tests/common/test_identifiers.py
--- a/swh/web/tests/common/test_identifiers.py
+++ b/swh/web/tests/common/test_identifiers.py
@@ -19,6 +19,7 @@
SWHID,
parse_swhid,
)
+from swh.model.model import Origin
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.common.exc import BadInputExc
from swh.web.common.identifiers import (
@@ -637,3 +638,13 @@
query_params={"path": dir_subdir_path},
)
assert resolved_swhid["browse_url"] == browse_url
+
+
+@given(directory())
+def test_resolve_swhid_with_malformed_origin_url(archive_data, directory):
+ origin_url = "http://example.org/project/abc"
+ malformed_origin_url = "http:/example.org/project/abc"
+ archive_data.origin_add([Origin(url=origin_url)])
+ swhid = gen_swhid(DIRECTORY, directory, metadata={"origin": malformed_origin_url})
+ resolved_swhid = resolve_swhid(swhid)
+ assert origin_url in resolved_swhid["browse_url"]

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 6:04 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216556

Event Timeline