Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123501
D8773.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D8773.diff
View Options
diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py
--- a/swh/lister/nixguix/lister.py
+++ b/swh/lister/nixguix/lister.py
@@ -22,6 +22,7 @@
import logging
from pathlib import Path
import random
+import re
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
from urllib.parse import parse_qsl, urlparse
@@ -136,20 +137,36 @@
POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
+PATTERN_VERSION = re.compile(r"(v*[0-9]+[.])([0-9]+[.]*)+")
+
+
def url_endswith(
urlparsed, extensions: List[str], raise_when_no_extension: bool = True
) -> bool:
- """Determine whether urlparsed ends with one of the extensions.
+ """Determine whether urlparsed ends with one of the extensions passed as parameter.
+
+ This also account for the edge case of a filename with only a version as name (so no
+ extension in the end.)
Raises:
- ArtifactWithoutExtension in case no extension is available and raise_when_no_extension
- is True (the default)
+ ArtifactWithoutExtension in case no extension is available and
+ raise_when_no_extension is True (the default)
"""
paths = [Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)]
if raise_when_no_extension and not any(path.suffix != "" for path in paths):
raise ArtifactWithoutExtension
- return any(path.suffix.endswith(tuple(extensions)) for path in paths)
+ match = any(path.suffix.endswith(tuple(extensions)) for path in paths)
+ if match:
+ return match
+ # Some false negative can happen (e.g. https://<netloc>/path/0.1.5)), so make sure
+ # to catch those
+ name = Path(urlparsed.path).name
+ if not PATTERN_VERSION.match(name):
+ return match
+ if raise_when_no_extension:
+ raise ArtifactWithoutExtension
+ return False
def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]:
diff --git a/swh/lister/nixguix/tests/data/sources-success.json b/swh/lister/nixguix/tests/data/sources-success.json
--- a/swh/lister/nixguix/tests/data/sources-success.json
+++ b/swh/lister/nixguix/tests/data/sources-success.json
@@ -265,6 +265,13 @@
"https://github.com/Doom-Utils/deutex/releases/download/v5.2.2/deutex-5.2.2.tar.zst"
],
"integrity": "sha256-EO0OelM+yXy20DVI1CWPvsiIUqRbXqTPVDQ3atQXS18="
+ },
+ {
+ "type": "url",
+ "urls": [
+ "https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5"
+ ],
+ "integrity": "sha256-Kb5f9LN54vxPiO99i8FyNCEw3T53owYfZMinXv5OunM="
}
],
"version": "1",
diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py
--- a/swh/lister/nixguix/tests/test_lister.py
+++ b/swh/lister/nixguix/tests/test_lister.py
@@ -52,7 +52,13 @@
[(f"one.{ext}", True) for ext in TARBALL_EXTENSIONS]
+ [(f"one.{ext}?foo=bar", True) for ext in TARBALL_EXTENSIONS]
+ [(f"one?p0=1&foo=bar.{ext}", True) for ext in DEFAULT_EXTENSIONS_TO_IGNORE]
- + [("two?file=something.el", False), ("foo?two=two&three=three", False)],
+ + [
+ ("two?file=something.el", False),
+ ("foo?two=two&three=three", False),
+ ("v1.2.3", False), # with raise_when_no_extension is False
+ ("2048-game-20151026.1233", False),
+ ("v2048-game-20151026.1233", False),
+ ],
)
def test_url_endswith(name, expected_result):
"""It should detect whether url or query params of the urls ends with extensions"""
@@ -67,9 +73,12 @@
)
-def test_url_endswith_raise():
+@pytest.mark.parametrize(
+ "name", ["foo?two=two&three=three", "tar.gz/0.1.5", "tar.gz/v10.3.1"]
+)
+def test_url_endswith_raise(name):
"""It should raise when the tested url has no extension"""
- urlparsed = urlparse("https://example.org/foo?two=two&three=three")
+ urlparsed = urlparse(f"https://example.org/{name}")
with pytest.raises(ArtifactWithoutExtension):
url_endswith(urlparsed, ["unimportant"])
@@ -225,6 +234,12 @@
"Location": "https://static.crates.io/crates/syntect/syntect-4.6.0.crate"
},
)
+ requests_mock.head(
+ "https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5",
+ headers={
+ "Content-Type": "application/x-gzip",
+ },
+ )
expected_visit_types = defaultdict(int)
# origin upstream is added as origin
@@ -248,7 +263,7 @@
expected_visit_types["content"] += 1
elif url.startswith("svn"): # mistyped artifact rendered as vcs nonetheless
expected_visit_types["svn"] += 1
- elif "crates.io" in url:
+ elif "crates.io" in url or "codeload.github.com" in url:
expected_visit_types["directory"] += 1
else: # tarball artifacts
expected_visit_types["directory"] += 1
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 9:46 AM (15 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216411
Attached To
D8773: nixguix: Deal with edge case url with version instead of extension
Event Timeline
Log In to Comment