Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066336
D4729.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D4729.diff
View Options
diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py
--- a/swh/web/common/archive.py
+++ b/swh/web/common/archive.py
@@ -8,6 +8,7 @@
import os
import re
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union
+from urllib.parse import urlparse
from swh.model import hashutil
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
@@ -248,6 +249,22 @@
# slash while the url in storage have it (e.g. Debian source package)
else:
origin_urls.append(f"{origin['url']}/")
+ try:
+ # handle case where the "://" character sequence was mangled into ":/"
+ parsed_url = urlparse(origin["url"])
+ if (
+ parsed_url.scheme
+ and not parsed_url.netloc
+ and origin["url"].startswith(f"{parsed_url.scheme}:/")
+ and not origin["url"].startswith(f"{parsed_url.scheme}://")
+ ):
+ origin_urls.append(
+ origin["url"].replace(
+ f"{parsed_url.scheme}:/", f"{parsed_url.scheme}://"
+ )
+ )
+ except Exception:
+ pass
origins = [o for o in storage.origin_get(origin_urls) if o is not None]
if not origins:
msg = "Origin with url %s not found!" % origin["url"]
diff --git a/swh/web/common/identifiers.py b/swh/web/common/identifiers.py
--- a/swh/web/common/identifiers.py
+++ b/swh/web/common/identifiers.py
@@ -116,7 +116,9 @@
query_dict[k] = query_params[k]
if "origin" in swhid_parsed.metadata:
- query_dict["origin_url"] = unquote(swhid_parsed.metadata["origin"])
+ origin_url = unquote(swhid_parsed.metadata["origin"])
+ origin_url = archive.lookup_origin({"url": origin_url})["url"]
+ query_dict["origin_url"] = origin_url
if "anchor" in swhid_parsed.metadata:
anchor_swhid_parsed = get_swhid(swhid_parsed.metadata["anchor"])
diff --git a/swh/web/tests/browse/views/test_identifiers.py b/swh/web/tests/browse/views/test_identifiers.py
--- a/swh/web/tests/browse/views/test_identifiers.py
+++ b/swh/web/tests/browse/views/test_identifiers.py
@@ -9,6 +9,7 @@
from hypothesis import given
from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
+from swh.model.model import Origin
from swh.web.common.identifiers import gen_swhid
from swh.web.common.utils import reverse
from swh.web.tests.django_asserts import assert_contains
@@ -123,9 +124,12 @@
@given(content())
-def test_content_id_optional_parts_browse(client, content):
+def test_content_id_optional_parts_browse(client, archive_data, content):
cnt_sha1_git = content["sha1_git"]
origin_url = "https://github.com/user/repo"
+
+ archive_data.origin_add([Origin(url=origin_url)])
+
swhid = gen_swhid(
CONTENT, cnt_sha1_git, metadata={"lines": "4-20", "origin": origin_url},
)
@@ -187,8 +191,9 @@
@given(directory())
-def test_browse_swhid_special_characters_escaping(client, directory):
+def test_browse_swhid_special_characters_escaping(client, archive_data, directory):
origin = "http://example.org/?project=abc;"
+ archive_data.origin_add([Origin(url=origin)])
origin_swhid_escaped = quote(origin, safe="/?:@&")
origin_swhid_url_escaped = quote(origin, safe="/:@;")
swhid = gen_swhid(DIRECTORY, directory, metadata={"origin": origin_swhid_escaped})
diff --git a/swh/web/tests/common/test_archive.py b/swh/web/tests/common/test_archive.py
--- a/swh/web/tests/common/test_archive.py
+++ b/swh/web/tests/common/test_archive.py
@@ -975,6 +975,14 @@
assert origin_info["url"] == deb_origin.url
+def test_lookup_origin_single_slash_after_protocol(archive_data):
+ origin_url = "http://snapshot.debian.org/package/r-base/"
+ malformed_origin_url = "http:/snapshot.debian.org/package/r-base/"
+ archive_data.origin_add([Origin(url=origin_url)])
+ origin_info = archive.lookup_origin({"url": malformed_origin_url})
+ assert origin_info["url"] == origin_url
+
+
@given(snapshot())
def test_lookup_snapshot_branch_name_from_tip_revision(archive_data, snapshot_id):
snapshot = archive_data.snapshot_get(snapshot_id)
diff --git a/swh/web/tests/common/test_identifiers.py b/swh/web/tests/common/test_identifiers.py
--- a/swh/web/tests/common/test_identifiers.py
+++ b/swh/web/tests/common/test_identifiers.py
@@ -19,6 +19,7 @@
SWHID,
parse_swhid,
)
+from swh.model.model import Origin
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.common.exc import BadInputExc
from swh.web.common.identifiers import (
@@ -637,3 +638,13 @@
query_params={"path": dir_subdir_path},
)
assert resolved_swhid["browse_url"] == browse_url
+
+
+@given(directory())
+def test_resolve_swhid_with_malformed_origin_url(archive_data, directory):
+ origin_url = "http://example.org/project/abc"
+ malformed_origin_url = "http:/example.org/project/abc"
+ archive_data.origin_add([Origin(url=origin_url)])
+ swhid = gen_swhid(DIRECTORY, directory, metadata={"origin": malformed_origin_url})
+ resolved_swhid = resolve_swhid(swhid)
+ assert origin_url in resolved_swhid["browse_url"]
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 6:04 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216556
Attached To
D4729: common/archive: Handle single slash after protocol in lookup_origin
Event Timeline
Log In to Comment