Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312272
D8618.id31120.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D8618.id31120.diff
View Options
diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -19,7 +19,8 @@
from swh.core.statsd import Statsd
from swh.core.tarball import uncompress
from swh.loader.core.metadata_fetchers import CredentialsType, get_fetchers_for_lister
-from swh.loader.exception import NotFound
+from swh.loader.core.utils import nix_store_check
+from swh.loader.exception import NotFound, UnsupportedChecksumComputation
from swh.loader.package.utils import download
from swh.model import from_disk
from swh.model.model import (
@@ -670,14 +671,26 @@
storage: StorageInterface,
url: str,
checksums: Dict[str, str],
+ checksums_computation: str = "standard",
fallback_urls: List[str] = None,
**kwargs,
):
super().__init__(storage, url, **kwargs)
self.snapshot: Optional[Snapshot] = None
self.checksums = checksums
+ self.checksums_computations = checksums_computation
+ if self.checksums_computations not in ("nar", "standard"):
+ raise UnsupportedChecksumComputation(
+ "Unsupported checksums computations: %s",
+ self.checksums_computations,
+ )
+
fallback_urls_ = fallback_urls or []
self.mirror_urls: List[str] = [self.origin.url, *fallback_urls_]
+ # Ensure content received matched the "standard" checksums received
+ self.standard_hashes = (
+ self.checksums if self.checksums_computations == "standard" else {}
+ )
def prepare(self) -> None:
self.last_snapshot = snapshot_get_latest(self.storage, self.origin.url)
@@ -727,7 +740,15 @@
)
try:
with tempfile.TemporaryDirectory() as tmpdir:
- file_path, _ = download(url, dest=tmpdir, hashes=self.checksums)
+ # the following includes the hash computation check
+ file_path, _ = download(
+ url, dest=tmpdir, hashes=self.standard_hashes
+ )
+ if self.checksums_computations == "nar":
+ # hashes are not "standard", so we need an extra check to happen
+ # on the file itself
+ nix_store_check(file_path, self.checksums)
+
with open(file_path, "rb") as file:
self.content = Content.from_data(file.read())
except HTTPError as http_error:
@@ -811,11 +832,11 @@
)
with tempfile.TemporaryDirectory() as tmpdir:
try:
+ # Ensure content received matched the "standard" checksums received
tarball_path, extrinsic_metadata = download(
url,
tmpdir,
- # Ensure content received matched the checksums received
- hashes=self.checksums,
+ hashes=self.standard_hashes,
extra_request_headers={"Accept-Encoding": "identity"},
)
except ValueError as e:
@@ -832,9 +853,17 @@
directory_path = os.path.join(tmpdir, "src")
os.makedirs(directory_path, exist_ok=True)
uncompress(tarball_path, dest=directory_path)
-
self.log.debug("uncompressed path to directory: %s", directory_path)
+ if self.checksums_computations == "nar":
+ # hashes are not "standard", so we need an extra check to happen
+ # on the uncompressed tarball
+ dir_to_check = os.path.join(
+ directory_path, os.listdir(directory_path)[0]
+ )
+ self.log.debug("Directory to check nar hashes: %s", dir_to_check)
+ nix_store_check(dir_to_check, self.checksums)
+
self.directory = from_disk.Directory.from_disk(
path=directory_path.encode("utf-8"),
max_content_length=self.max_content_size,
diff --git a/swh/loader/core/tests/conftest.py b/swh/loader/core/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/core/tests/conftest.py
@@ -0,0 +1,8 @@
+# Copyright (C) 2018-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import shutil
+
+nix_store_missing = shutil.which("nix-store") is None
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -35,6 +35,8 @@
)
import swh.storage.exc
+from .conftest import nix_store_missing
+
ORIGIN = Origin(url="some-url")
PARENT_ORIGIN = Origin(url="base-origin-url")
@@ -767,3 +769,29 @@
result2 = loader.load()
assert result2 == {"status": "uneventful"}
+
+
+@pytest.mark.skipif(nix_store_missing, reason="requires nix-bin installed (bullseye)")
+def test_directory_loader_ok_with_nar(swh_storage, requests_mock_datadir, tarball_path):
+ """It should be an eventful visit on a tarball with nar hashes, then uneventful"""
+ origin = Origin(DIRECTORY_URL)
+ loader = DirectoryLoader(
+ swh_storage,
+ origin.url,
+ checksums={
+ "sha256": "23fb1fe278aeb2de899f7d7f10cf892f63136cea2c07146da2200da4de54b7e4"
+ },
+ checksums_computation="nar",
+ )
+ result = loader.load()
+
+ assert result == {"status": "eventful"}
+
+ visit_status = assert_last_visit_matches(
+ swh_storage, origin.url, status="full", type="directory"
+ )
+ assert visit_status.snapshot is not None
+
+ result2 = loader.load()
+
+ assert result2 == {"status": "uneventful"}
diff --git a/swh/loader/core/utils.py b/swh/loader/core/utils.py
--- a/swh/loader/core/utils.py
+++ b/swh/loader/core/utils.py
@@ -9,14 +9,18 @@
import os
import shutil
import signal
+from subprocess import PIPE, Popen
import time
import traceback
-from typing import Callable, Optional, Union
+from typing import Callable, Dict, Optional, Union
from billiard import Process, Queue # type: ignore
from dateutil.parser import parse
import psutil
+from swh.loader.exception import MissingOptionalDependency
+from swh.model.hashutil import MultiHash
+
def clean_dangling_folders(dirpath: str, pattern_check: str, log=None) -> None:
"""Clean up potential dangling temporary working folder rooted at `dirpath`. Those
@@ -125,3 +129,20 @@
return parse(visit_date)
raise ValueError(f"invalid visit date {visit_date!r}")
+
+
+def nix_store_check(filepath: str, checksums: Dict[str, str]):
+ h = MultiHash(hash_names=checksums.keys())
+
+ try:
+ command = ["nix-store", "--dump", filepath]
+ with Popen(command, stdout=PIPE) as proc:
+ assert proc.stdout is not None
+ for chunk in proc.stdout:
+ h.update(chunk)
+
+ actual_hashes = h.hexdigest()
+ assert actual_hashes == checksums
+
+ except FileNotFoundError:
+ raise MissingOptionalDependency("nix-store")
diff --git a/swh/loader/exception.py b/swh/loader/exception.py
--- a/swh/loader/exception.py
+++ b/swh/loader/exception.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,3 +11,15 @@
"""
pass
+
+
+class MissingOptionalDependency(ValueError):
+ """An exception raised when an optional runtime dependency is missing."""
+
+ pass
+
+
+class UnsupportedChecksumComputation(ValueError):
+ """An exception raised when loader cannot compute such checksums."""
+
+ pass
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:48 AM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219556
Attached To
D8618: DirectoryLoader: Check nar hashes when provided
Event Timeline
Log In to Comment