Page MenuHomeSoftware Heritage

D5035.id18042.diff
No OneTemporary

D5035.id18042.diff

diff --git a/swh/loader/exception.py b/swh/loader/exception.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/exception.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class NotFound(ValueError):
+ """An exception raised when some information to retrieve is not found (e.g origin,
+ artifact, ...)
+
+ """
+
+ pass
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -29,6 +29,7 @@
from swh.core.config import load_from_envvar
from swh.core.tarball import uncompress
+from swh.loader.exception import NotFound
from swh.loader.package.utils import download
from swh.model import from_disk
from swh.model.collections import ImmutableDict
@@ -157,6 +158,10 @@
def get_versions(self) -> Sequence[str]:
"""Return the list of all published package versions.
+ Raises:
+ `class:swh.loader.exception.NotFound` error when failing to read the
+ published package versions.
+
Returns:
Sequence of published versions
@@ -413,7 +418,18 @@
load_exceptions: List[Exception] = []
- for version in self.get_versions(): # for each
+ try:
+ versions = self.get_versions()
+ except NotFound:
+ status_visit = "not_found"
+ status_load = "failed"
+ return finalize_visit()
+ except Exception:
+ status_visit = "failed"
+ status_load = "failed"
+ return finalize_visit()
+
+ for version in versions:
logger.debug("version: %s", version)
tmp_revisions[version] = []
# `p_` stands for `package_`
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 The Software Heritage developers
+# Copyright (C) 2020-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -16,7 +16,7 @@
PackageLoader,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info
+from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method
from swh.model import hashutil
from swh.model.collections import ImmutableDict
from swh.model.model import (
@@ -60,29 +60,31 @@
def __init__(self, url):
super().__init__(url=url)
- unsupported_file_extensions = self.config.get("unsupported_file_extensions", [])
- self.raw_sources = retrieve_sources(url)
- clean = clean_sources(
- parse_sources(self.raw_sources), unsupported_file_extensions
- )
- self.sources = clean["sources"]
self.provider_url = url
- self._integrityByUrl = {s["urls"][0]: s["integrity"] for s in self.sources}
-
- # The revision used to create the sources.json file. For Nix,
- # this revision belongs to the github.com/nixos/nixpkgs
- # repository
- self.revision = clean["revision"]
-
# Note: this could be renamed get_artifacts in the PackageLoader
# base class.
- def get_versions(self):
+ @cached_method
+ def raw_sources(self):
+ return retrieve_sources(self.url)
+
+ @cached_method
+ def supported_sources(self):
+ raw_sources = self.raw_sources()
+ unsupported_file_extensions = self.config.get("unsupported_file_extensions", [])
+ return clean_sources(parse_sources(raw_sources), unsupported_file_extensions)
+
+ @cached_method
+ def integrity_by_url(self) -> Dict[str, Any]:
+ sources = self.supported_sources()
+ return {s["urls"][0]: s["integrity"] for s in sources["sources"]}
+
+ def get_versions(self) -> List[str]:
"""The first mirror of the mirror list is used as branch name in the
snapshot.
"""
- return self._integrityByUrl.keys()
+ return list(self.integrity_by_url().keys())
def get_metadata_authority(self):
return MetadataAuthority(
@@ -92,7 +94,7 @@
def get_extrinsic_snapshot_metadata(self):
return [
RawExtrinsicMetadataCore(
- format="nixguix-sources-json", metadata=self.raw_sources,
+ format="nixguix-sources-json", metadata=self.raw_sources(),
),
]
@@ -103,7 +105,7 @@
# can be fetched from several urls, called mirrors. We
# currently only use the first one, but if the first one
# fails, we should try the second one and so on.
- integrity = self._integrityByUrl[url]
+ integrity = self.integrity_by_url()[url]
p_info = NixGuixPackageInfo.from_metadata({"url": url, "integrity": integrity})
yield url, p_info
@@ -178,10 +180,14 @@
a Nix/Guix evaluation.
"""
+ # The revision used to create the sources.json file. For Nix,
+ # this revision belongs to the github.com/nixos/nixpkgs
+ # repository
+ revision = self.supported_sources()["revision"]
return {
b"evaluation": {
"target_type": "revision",
- "target": hashutil.hash_to_bytes(self.revision),
+ "target": hashutil.hash_to_bytes(revision),
}
}
@@ -209,6 +215,7 @@
def retrieve_sources(url: str) -> bytes:
+ """Retrieve sources. Potentially raise NotFound error."""
return api_info(url, allow_redirects=True)
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -1,10 +1,9 @@
-# Copyright (C) 2020 The Software Heritage developers
+# Copyright (C) 2020-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
-from json.decoder import JSONDecodeError
import logging
import os
from typing import Dict, Optional, Tuple
@@ -107,14 +106,33 @@
assert len(j["sources"]) == 2
-def test_retrieve_non_existing(swh_config, requests_mock_datadir):
- with pytest.raises(ValueError):
- NixGuixLoader("https://non-existing-url")
+def test_nixguix_url_not_found(swh_config, requests_mock_datadir):
+ """When failing to read from the url, the visit is marked as not_found.
+
+ """
+ unknown_url = "https://non-existing-url"
+ loader = NixGuixLoader(unknown_url)
+ # during the retrieval step
+ load_status = loader.load()
+
+ assert load_status == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, unknown_url, status="not_found", type="nixguix", snapshot=None
+ )
-def test_retrieve_non_json(swh_config, requests_mock_datadir):
- with pytest.raises(JSONDecodeError):
- NixGuixLoader("https://example.com/file.txt")
+def test_nixguix_url_with_decoding_error(swh_config, requests_mock_datadir):
+ """Other errors during communication with the url, the visit is marked as failed"""
+ sources_url = "https://example.com/file.txt"
+ loader = NixGuixLoader(sources_url)
+ load_status = loader.load()
+
+ assert load_status == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, sources_url, status="failed", type="nixguix", snapshot=None
+ )
def test_clean_sources_invalid_schema(swh_config, requests_mock_datadir):
@@ -304,7 +322,8 @@
loader = NixGuixLoader(sources_url)
loader_status = loader.load()
- urls = [s["urls"][0] for s in loader.sources]
+ sources = loader.supported_sources()["sources"]
+ urls = [s["urls"][0] for s in sources]
assert "https://example.com/file.txt" in urls
assert loader_status["status"] == "eventful"
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -701,3 +701,14 @@
}
assert_last_visit_matches(loader.storage, url, status="failed", type="npm")
+
+
+def test_npm_origin_not_found(swh_config, requests_mock_datadir):
+ url = package_url("non-existent-url")
+ loader = NpmLoader(url)
+
+ assert loader.load() == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, url, status="not_found", type="npm", snapshot=None
+ )
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -904,3 +904,14 @@
assert_last_visit_matches(
loader.storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
)
+
+
+def test_pypi_origin_not_found(swh_config, requests_mock_datadir):
+ url = "https://pypi.org/project/unknown"
+ loader = PyPILoader(url)
+
+ assert loader.load() == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, url, status="not_found", type="pypi", snapshot=None
+ )
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,6 +9,7 @@
import pytest
+from swh.loader.exception import NotFound
import swh.loader.package
from swh.loader.package.utils import api_info, download, release_name
@@ -135,7 +136,7 @@
status_code = 400
requests_mock.get(url, status_code=status_code)
- with pytest.raises(ValueError) as e0:
+ with pytest.raises(NotFound) as e0:
api_info(url)
assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,6 +11,7 @@
import requests
+from swh.loader.exception import NotFound
from swh.loader.package import DEFAULT_PARAMS
from swh.model.hashutil import HASH_BLOCK_SIZE, MultiHash
from swh.model.model import Person
@@ -32,7 +33,7 @@
url (str): The api url (e.g PyPI, npm, etc...)
Raises:
- ValueError in case of query failures (for some reasons: 404, ...)
+ NotFound in case of query failures (for some reasons: 404, ...)
Returns:
The associated response's information
@@ -40,7 +41,7 @@
"""
response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
if response.status_code != 200:
- raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code))
+ raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
return response.content

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 7:35 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227667

Event Timeline