diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -242,12 +242,33 @@ url, ) + origin = urls[0] + content_type = response.headers.get("Content-Type") if content_type: logger.debug("Content-Type: %s", content_type) if content_type == "application/json": - return False, urls[0] - return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), urls[0] + return False, origin + return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin + + content_disposition = response.headers.get("Content-Disposition") + if content_disposition: + logger.debug("Content-Disposition: %s", content_disposition) + if "filename=" in content_disposition: + fields = content_disposition.split("; ") + for field in fields: + if "filename=" in field: + _, filename = field.split("filename=") + break + + return ( + url_endswith( + urlparse(filename), + TARBALL_EXTENSIONS, + raise_when_no_extension=False, + ), + origin, + ) raise ArtifactNatureUndetected( f"Cannot determine artifact type from url <{url}>" diff --git a/swh/lister/nixguix/tests/data/sources-success.json b/swh/lister/nixguix/tests/data/sources-success.json --- a/swh/lister/nixguix/tests/data/sources-success.json +++ b/swh/lister/nixguix/tests/data/sources-success.json @@ -272,6 +272,20 @@ "https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5" ], "integrity": "sha256-Kb5f9LN54vxPiO99i8FyNCEw3T53owYfZMinXv5OunM=" + }, + { + "type": "url", + "urls": [ + "https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1" + ], + "integrity": "sha256-pBf9PTQiEv0ZDk8hvoLvE8EOHtfCiPu+RuRiAM895Ng=" + }, + { + "type": "url", + "urls": [ + "https://codeload.github.com/fifengine/fifengine/tar.gz/0.4.2" + ], + "integrity": "sha256-6IK1W++jauLxqJraFq8PgUobePfL5gIexbFgVgTPj/g=" } ], "version": "1", diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -240,6 +240,19 @@ "Content-Type": "application/x-gzip", }, ) + requests_mock.head( + "https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1", + headers={ + "Content-Disposition": "attachment; filename=unknown-horizons-2019.1.tar.gz", + }, + ) + requests_mock.head( + "https://codeload.github.com/fifengine/fifengine/tar.gz/0.4.2", + headers={ + "Content-Disposition": "attachment; name=fieldName; " + "filename=fifengine-0.4.2.tar.gz; other=stuff", + }, + ) expected_visit_types = defaultdict(int) # origin upstream is added as origin