Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343133
D5035.id18042.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D5035.id18042.diff
View Options
diff --git a/swh/loader/exception.py b/swh/loader/exception.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/exception.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class NotFound(ValueError):
+ """An exception raised when some information to retrieve is not found (e.g origin,
+ artifact, ...)
+
+ """
+
+ pass
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -29,6 +29,7 @@
from swh.core.config import load_from_envvar
from swh.core.tarball import uncompress
+from swh.loader.exception import NotFound
from swh.loader.package.utils import download
from swh.model import from_disk
from swh.model.collections import ImmutableDict
@@ -157,6 +158,10 @@
def get_versions(self) -> Sequence[str]:
"""Return the list of all published package versions.
+ Raises:
+ `class:swh.loader.exception.NotFound` error when failing to read the
+ published package versions.
+
Returns:
Sequence of published versions
@@ -413,7 +418,18 @@
load_exceptions: List[Exception] = []
- for version in self.get_versions(): # for each
+ try:
+ versions = self.get_versions()
+ except NotFound:
+ status_visit = "not_found"
+ status_load = "failed"
+ return finalize_visit()
+ except Exception:
+ status_visit = "failed"
+ status_load = "failed"
+ return finalize_visit()
+
+ for version in versions:
logger.debug("version: %s", version)
tmp_revisions[version] = []
# `p_` stands for `package_`
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020 The Software Heritage developers
+# Copyright (C) 2020-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -16,7 +16,7 @@
PackageLoader,
RawExtrinsicMetadataCore,
)
-from swh.loader.package.utils import EMPTY_AUTHOR, api_info
+from swh.loader.package.utils import EMPTY_AUTHOR, api_info, cached_method
from swh.model import hashutil
from swh.model.collections import ImmutableDict
from swh.model.model import (
@@ -60,29 +60,31 @@
def __init__(self, url):
super().__init__(url=url)
- unsupported_file_extensions = self.config.get("unsupported_file_extensions", [])
- self.raw_sources = retrieve_sources(url)
- clean = clean_sources(
- parse_sources(self.raw_sources), unsupported_file_extensions
- )
- self.sources = clean["sources"]
self.provider_url = url
- self._integrityByUrl = {s["urls"][0]: s["integrity"] for s in self.sources}
-
- # The revision used to create the sources.json file. For Nix,
- # this revision belongs to the github.com/nixos/nixpkgs
- # repository
- self.revision = clean["revision"]
-
# Note: this could be renamed get_artifacts in the PackageLoader
# base class.
- def get_versions(self):
+ @cached_method
+ def raw_sources(self):
+ return retrieve_sources(self.url)
+
+ @cached_method
+ def supported_sources(self):
+ raw_sources = self.raw_sources()
+ unsupported_file_extensions = self.config.get("unsupported_file_extensions", [])
+ return clean_sources(parse_sources(raw_sources), unsupported_file_extensions)
+
+ @cached_method
+ def integrity_by_url(self) -> Dict[str, Any]:
+ sources = self.supported_sources()
+ return {s["urls"][0]: s["integrity"] for s in sources["sources"]}
+
+ def get_versions(self) -> List[str]:
"""The first mirror of the mirror list is used as branch name in the
snapshot.
"""
- return self._integrityByUrl.keys()
+ return list(self.integrity_by_url().keys())
def get_metadata_authority(self):
return MetadataAuthority(
@@ -92,7 +94,7 @@
def get_extrinsic_snapshot_metadata(self):
return [
RawExtrinsicMetadataCore(
- format="nixguix-sources-json", metadata=self.raw_sources,
+ format="nixguix-sources-json", metadata=self.raw_sources(),
),
]
@@ -103,7 +105,7 @@
# can be fetched from several urls, called mirrors. We
# currently only use the first one, but if the first one
# fails, we should try the second one and so on.
- integrity = self._integrityByUrl[url]
+ integrity = self.integrity_by_url()[url]
p_info = NixGuixPackageInfo.from_metadata({"url": url, "integrity": integrity})
yield url, p_info
@@ -178,10 +180,14 @@
a Nix/Guix evaluation.
"""
+ # The revision used to create the sources.json file. For Nix,
+ # this revision belongs to the github.com/nixos/nixpkgs
+ # repository
+ revision = self.supported_sources()["revision"]
return {
b"evaluation": {
"target_type": "revision",
- "target": hashutil.hash_to_bytes(self.revision),
+ "target": hashutil.hash_to_bytes(revision),
}
}
@@ -209,6 +215,7 @@
def retrieve_sources(url: str) -> bytes:
+ """Retrieve sources. Potentially raise NotFound error."""
return api_info(url, allow_redirects=True)
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -1,10 +1,9 @@
-# Copyright (C) 2020 The Software Heritage developers
+# Copyright (C) 2020-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
-from json.decoder import JSONDecodeError
import logging
import os
from typing import Dict, Optional, Tuple
@@ -107,14 +106,33 @@
assert len(j["sources"]) == 2
-def test_retrieve_non_existing(swh_config, requests_mock_datadir):
- with pytest.raises(ValueError):
- NixGuixLoader("https://non-existing-url")
+def test_nixguix_url_not_found(swh_config, requests_mock_datadir):
+ """When failing to read from the url, the visit is marked as not_found.
+
+ """
+ unknown_url = "https://non-existing-url"
+ loader = NixGuixLoader(unknown_url)
+ # during the retrieval step
+ load_status = loader.load()
+
+ assert load_status == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, unknown_url, status="not_found", type="nixguix", snapshot=None
+ )
-def test_retrieve_non_json(swh_config, requests_mock_datadir):
- with pytest.raises(JSONDecodeError):
- NixGuixLoader("https://example.com/file.txt")
+def test_nixguix_url_with_decoding_error(swh_config, requests_mock_datadir):
+ """Other errors during communication with the url, the visit is marked as failed"""
+ sources_url = "https://example.com/file.txt"
+ loader = NixGuixLoader(sources_url)
+ load_status = loader.load()
+
+ assert load_status == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, sources_url, status="failed", type="nixguix", snapshot=None
+ )
def test_clean_sources_invalid_schema(swh_config, requests_mock_datadir):
@@ -304,7 +322,8 @@
loader = NixGuixLoader(sources_url)
loader_status = loader.load()
- urls = [s["urls"][0] for s in loader.sources]
+ sources = loader.supported_sources()["sources"]
+ urls = [s["urls"][0] for s in sources]
assert "https://example.com/file.txt" in urls
assert loader_status["status"] == "eventful"
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -701,3 +701,14 @@
}
assert_last_visit_matches(loader.storage, url, status="failed", type="npm")
+
+
+def test_npm_origin_not_found(swh_config, requests_mock_datadir):
+ url = package_url("non-existent-url")
+ loader = NpmLoader(url)
+
+ assert loader.load() == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, url, status="not_found", type="npm", snapshot=None
+ )
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -904,3 +904,14 @@
assert_last_visit_matches(
loader.storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
)
+
+
+def test_pypi_origin_not_found(swh_config, requests_mock_datadir):
+ url = "https://pypi.org/project/unknown"
+ loader = PyPILoader(url)
+
+ assert loader.load() == {"status": "failed"}
+
+ assert_last_visit_matches(
+ loader.storage, url, status="not_found", type="pypi", snapshot=None
+ )
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,6 +9,7 @@
import pytest
+from swh.loader.exception import NotFound
import swh.loader.package
from swh.loader.package.utils import api_info, download, release_name
@@ -135,7 +136,7 @@
status_code = 400
requests_mock.get(url, status_code=status_code)
- with pytest.raises(ValueError) as e0:
+ with pytest.raises(NotFound) as e0:
api_info(url)
assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,6 +11,7 @@
import requests
+from swh.loader.exception import NotFound
from swh.loader.package import DEFAULT_PARAMS
from swh.model.hashutil import HASH_BLOCK_SIZE, MultiHash
from swh.model.model import Person
@@ -32,7 +33,7 @@
url (str): The api url (e.g PyPI, npm, etc...)
Raises:
- ValueError in case of query failures (for some reasons: 404, ...)
+ NotFound in case of query failures (for some reasons: 404, ...)
Returns:
The associated response's information
@@ -40,7 +41,7 @@
"""
response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
if response.status_code != 200:
- raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code))
+ raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
return response.content
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 7:35 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227667
Attached To
D5035: package: Mark visit as not_found when relevant
Event Timeline
Log In to Comment