diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -242,12 +242,30 @@ url, ) + origin = urls[0] + content_type = response.headers.get("Content-Type") if content_type: logger.debug("Content-Type: %s", content_type) if content_type == "application/json": - return False, urls[0] - return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), urls[0] + return False, origin + return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin + + content_disposition = response.headers.get("Content-Disposition") + if content_disposition: + logger.debug("Content-Disposition: %s", content_disposition) + content_disposition_type, filename_type = content_disposition.split("; ") + assert content_disposition_type == "attachment" + _, filename = filename_type.split("=") + + return ( + url_endswith( + urlparse(filename), + TARBALL_EXTENSIONS, + raise_when_no_extension=False, + ), + origin, + ) raise ArtifactNatureUndetected( f"Cannot determine artifact type from url <{url}>" diff --git a/swh/lister/nixguix/tests/data/sources-success.json b/swh/lister/nixguix/tests/data/sources-success.json --- a/swh/lister/nixguix/tests/data/sources-success.json +++ b/swh/lister/nixguix/tests/data/sources-success.json @@ -272,6 +272,13 @@ "https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5" ], "integrity": "sha256-Kb5f9LN54vxPiO99i8FyNCEw3T53owYfZMinXv5OunM=" + }, + { + "type": "url", + "urls": [ + "https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1" + ], + "integrity": "sha256-pBf9PTQiEv0ZDk8hvoLvE8EOHtfCiPu+RuRiAM895Ng=" } ], "version": "1", diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -238,6 +238,12 @@ "Content-Type": "application/x-gzip", }, ) + requests_mock.head( + "https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1", + headers={ + "Content-Disposition": "attachment; filename=unknown-horizons-2019.1.tar.gz", + }, + ) expected_visit_types = defaultdict(int) # origin upstream is added as origin