Page MenuHomeSoftware Heritage

D8618.id31120.diff
No OneTemporary

D8618.id31120.diff

diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -19,7 +19,8 @@
from swh.core.statsd import Statsd
from swh.core.tarball import uncompress
from swh.loader.core.metadata_fetchers import CredentialsType, get_fetchers_for_lister
-from swh.loader.exception import NotFound
+from swh.loader.core.utils import nix_store_check
+from swh.loader.exception import NotFound, UnsupportedChecksumComputation
from swh.loader.package.utils import download
from swh.model import from_disk
from swh.model.model import (
@@ -670,14 +671,26 @@
storage: StorageInterface,
url: str,
checksums: Dict[str, str],
+ checksums_computation: str = "standard",
fallback_urls: List[str] = None,
**kwargs,
):
super().__init__(storage, url, **kwargs)
self.snapshot: Optional[Snapshot] = None
self.checksums = checksums
+ self.checksums_computations = checksums_computation
+ if self.checksums_computations not in ("nar", "standard"):
+ raise UnsupportedChecksumComputation(
+ "Unsupported checksums computations: %s",
+ self.checksums_computations,
+ )
+
fallback_urls_ = fallback_urls or []
self.mirror_urls: List[str] = [self.origin.url, *fallback_urls_]
+ # Ensure content received matched the "standard" checksums received
+ self.standard_hashes = (
+ self.checksums if self.checksums_computations == "standard" else {}
+ )
def prepare(self) -> None:
self.last_snapshot = snapshot_get_latest(self.storage, self.origin.url)
@@ -727,7 +740,15 @@
)
try:
with tempfile.TemporaryDirectory() as tmpdir:
- file_path, _ = download(url, dest=tmpdir, hashes=self.checksums)
+ # the following includes the hash computation check
+ file_path, _ = download(
+ url, dest=tmpdir, hashes=self.standard_hashes
+ )
+ if self.checksums_computations == "nar":
+ # hashes are not "standard", so we need an extra check to happen
+ # on the file itself
+ nix_store_check(file_path, self.checksums)
+
with open(file_path, "rb") as file:
self.content = Content.from_data(file.read())
except HTTPError as http_error:
@@ -811,11 +832,11 @@
)
with tempfile.TemporaryDirectory() as tmpdir:
try:
+ # Ensure content received matched the "standard" checksums received
tarball_path, extrinsic_metadata = download(
url,
tmpdir,
- # Ensure content received matched the checksums received
- hashes=self.checksums,
+ hashes=self.standard_hashes,
extra_request_headers={"Accept-Encoding": "identity"},
)
except ValueError as e:
@@ -832,9 +853,17 @@
directory_path = os.path.join(tmpdir, "src")
os.makedirs(directory_path, exist_ok=True)
uncompress(tarball_path, dest=directory_path)
-
self.log.debug("uncompressed path to directory: %s", directory_path)
+ if self.checksums_computations == "nar":
+ # hashes are not "standard", so we need an extra check to happen
+ # on the uncompressed tarball
+ dir_to_check = os.path.join(
+ directory_path, os.listdir(directory_path)[0]
+ )
+ self.log.debug("Directory to check nar hashes: %s", dir_to_check)
+ nix_store_check(dir_to_check, self.checksums)
+
self.directory = from_disk.Directory.from_disk(
path=directory_path.encode("utf-8"),
max_content_length=self.max_content_size,
diff --git a/swh/loader/core/tests/conftest.py b/swh/loader/core/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/core/tests/conftest.py
@@ -0,0 +1,8 @@
+# Copyright (C) 2018-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import shutil
+
+nix_store_missing = shutil.which("nix-store") is None
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -35,6 +35,8 @@
)
import swh.storage.exc
+from .conftest import nix_store_missing
+
ORIGIN = Origin(url="some-url")
PARENT_ORIGIN = Origin(url="base-origin-url")
@@ -767,3 +769,29 @@
result2 = loader.load()
assert result2 == {"status": "uneventful"}
+
+
+@pytest.mark.skipif(nix_store_missing, reason="requires nix-bin installed (bullseye)")
+def test_directory_loader_ok_with_nar(swh_storage, requests_mock_datadir, tarball_path):
+ """It should be an eventful visit on a tarball with nar hashes, then uneventful"""
+ origin = Origin(DIRECTORY_URL)
+ loader = DirectoryLoader(
+ swh_storage,
+ origin.url,
+ checksums={
+ "sha256": "23fb1fe278aeb2de899f7d7f10cf892f63136cea2c07146da2200da4de54b7e4"
+ },
+ checksums_computation="nar",
+ )
+ result = loader.load()
+
+ assert result == {"status": "eventful"}
+
+ visit_status = assert_last_visit_matches(
+ swh_storage, origin.url, status="full", type="directory"
+ )
+ assert visit_status.snapshot is not None
+
+ result2 = loader.load()
+
+ assert result2 == {"status": "uneventful"}
diff --git a/swh/loader/core/utils.py b/swh/loader/core/utils.py
--- a/swh/loader/core/utils.py
+++ b/swh/loader/core/utils.py
@@ -9,14 +9,18 @@
import os
import shutil
import signal
+from subprocess import PIPE, Popen
import time
import traceback
-from typing import Callable, Optional, Union
+from typing import Callable, Dict, Optional, Union
from billiard import Process, Queue # type: ignore
from dateutil.parser import parse
import psutil
+from swh.loader.exception import MissingOptionalDependency
+from swh.model.hashutil import MultiHash
+
def clean_dangling_folders(dirpath: str, pattern_check: str, log=None) -> None:
"""Clean up potential dangling temporary working folder rooted at `dirpath`. Those
@@ -125,3 +129,20 @@
return parse(visit_date)
raise ValueError(f"invalid visit date {visit_date!r}")
+
+
+def nix_store_check(filepath: str, checksums: Dict[str, str]):
+ h = MultiHash(hash_names=checksums.keys())
+
+ try:
+ command = ["nix-store", "--dump", filepath]
+ with Popen(command, stdout=PIPE) as proc:
+ assert proc.stdout is not None
+ for chunk in proc.stdout:
+ h.update(chunk)
+
+ actual_hashes = h.hexdigest()
+ assert actual_hashes == checksums
+
+ except FileNotFoundError:
+ raise MissingOptionalDependency("nix-store")
diff --git a/swh/loader/exception.py b/swh/loader/exception.py
--- a/swh/loader/exception.py
+++ b/swh/loader/exception.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,3 +11,15 @@
"""
pass
+
+
+class MissingOptionalDependency(ValueError):
+ """An exception raised when an optional runtime dependency is missing."""
+
+ pass
+
+
+class UnsupportedChecksumComputation(ValueError):
+ """An exception raised when loader cannot compute such checksums."""
+
+ pass

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:48 AM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219556

Event Timeline