diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -28,7 +28,7 @@ PackageLoader, RawExtrinsicMetadataCore, ) -from swh.loader.package.utils import download +from swh.loader.package.utils import cached_method, download logger = logging.getLogger(__name__) @@ -111,7 +111,6 @@ config_deposit = self.config["deposit"] self.deposit_id = deposit_id self.client = ApiClient(url=config_deposit["url"], auth=config_deposit["auth"]) - self.metadata: Dict[str, Any] = {} def get_versions(self) -> Sequence[str]: # only 1 branch 'HEAD' with no alias since we only have 1 snapshot @@ -119,7 +118,7 @@ return ["HEAD"] def get_metadata_authority(self) -> MetadataAuthority: - provider = self.metadata["origin_metadata"]["provider"] + provider = self.metadata()["origin_metadata"]["provider"] assert provider["provider_type"] == "deposit_client" return MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, @@ -131,7 +130,7 @@ ) def get_metadata_fetcher(self) -> MetadataFetcher: - tool = self.metadata["origin_metadata"]["tool"] + tool = self.metadata()["origin_metadata"]["tool"] return MetadataFetcher( name=tool["name"], version=tool["version"], metadata=tool["configuration"], ) @@ -140,7 +139,7 @@ self, version: str ) -> Iterator[Tuple[str, DepositPackageInfo]]: p_info = DepositPackageInfo.from_metadata( - self.metadata, url=self.url, filename="archive.zip", + self.metadata(), url=self.url, filename="archive.zip", ) yield "HEAD", p_info @@ -179,7 +178,7 @@ ) def get_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadataCore]: - origin_metadata = self.metadata["origin_metadata"] + origin_metadata = self.metadata()["origin_metadata"] return [ RawExtrinsicMetadataCore( format="sword-v2-atom-codemeta-v2-in-json", @@ -187,10 +186,15 @@ ) ] + @cached_method + def metadata(self): + """Returns metadata from the deposit server""" + return self.client.metadata_get(self.deposit_id) + def load(self) -> Dict: # First making sure the deposit is known prior to trigger a loading try: - self.metadata = self.client.metadata_get(self.deposit_id) + self.metadata() except ValueError: logger.error(f"Unknown deposit {self.deposit_id}, ignoring") return {"status": "failed"} diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -29,7 +29,7 @@ PackageLoader, RawExtrinsicMetadataCore, ) -from swh.loader.package.utils import api_info, release_name +from swh.loader.package.utils import api_info, cached_method, release_name logger = logging.getLogger(__name__) @@ -96,21 +96,22 @@ self._info: Dict[str, Any] = {} self._versions = None - @property - def info(self) -> Dict[str, Any]: + @cached_method + def _raw_info(self) -> bytes: + return api_info(self.provider_url) + + @cached_method + def info(self) -> Dict: """Return the project metadata information (fetched from npm registry) """ - if not self._info: - self._raw_info = api_info(self.provider_url) - self._info = json.loads(self._raw_info) - return self._info + return json.loads(self._raw_info()) def get_versions(self) -> Sequence[str]: - return sorted(list(self.info["versions"].keys())) + return sorted(list(self.info()["versions"].keys())) def get_default_version(self) -> str: - return self.info["dist-tags"].get("latest", "") + return self.info()["dist-tags"].get("latest", "") def get_metadata_authority(self): return MetadataAuthority( @@ -120,13 +121,13 @@ def get_extrinsic_snapshot_metadata(self): return [ RawExtrinsicMetadataCore( - format="replicate-npm-package-json", metadata=self._raw_info, + format="replicate-npm-package-json", metadata=self._raw_info(), ), ] def get_package_info(self, version: str) -> Iterator[Tuple[str, NpmPackageInfo]]: p_info = NpmPackageInfo.from_metadata( - project_metadata=self.info, version=version + project_metadata=self.info(), version=version ) yield release_name(version), p_info diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -411,7 +411,7 @@ object_type="snapshot", object_id=hash_to_hex(expected_snapshot_id) ) metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.FORGE, url="https://replicate.npmjs.com/", + type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", ) expected_metadata = [ RawExtrinsicMetadata( @@ -463,7 +463,10 @@ "snapshot": 1, } == stats - loader._info = None # reset loader internal state + # reset loader internal state + del loader._cached_info + del loader._cached__raw_info + actual_load_status2 = loader.load() assert actual_load_status2["status"] == "eventful" snap_id2 = actual_load_status2["snapshot_id"] diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -27,7 +27,12 @@ PackageLoader, RawExtrinsicMetadataCore, ) -from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR +from swh.loader.package.utils import ( + api_info, + cached_method, + release_name, + EMPTY_AUTHOR, +) logger = logging.getLogger(__name__) @@ -61,24 +66,24 @@ def __init__(self, url): super().__init__(url=url) - self._info = None self.provider_url = pypi_api_url(self.url) - @property + @cached_method + def _raw_info(self) -> bytes: + return api_info(self.provider_url) + + @cached_method def info(self) -> Dict: """Return the project metadata information (fetched from pypi registry) """ - if not self._info: - self._raw_info = api_info(self.provider_url) - self._info = json.loads(self._raw_info) - return self._info + return json.loads(self._raw_info()) def get_versions(self) -> Sequence[str]: - return self.info["releases"].keys() + return self.info()["releases"].keys() def get_default_version(self) -> str: - return self.info["info"]["version"] + return self.info()["info"]["version"] def get_metadata_authority(self): p_url = urlparse(self.url) @@ -91,13 +96,13 @@ def get_extrinsic_snapshot_metadata(self): return [ RawExtrinsicMetadataCore( - format="pypi-project-json", metadata=self._raw_info, + format="pypi-project-json", metadata=self._raw_info(), ), ] def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]: res = [] - for meta in self.info["releases"][version]: + for meta in self.info()["releases"][version]: if meta["packagetype"] != "sdist": continue p_info = PyPIPackageInfo.from_metadata(meta) diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -638,12 +638,13 @@ } == visit1_stats # Reset internal state - loader._info = None + del loader._cached__raw_info + del loader._cached_info visit2_actual_load_status = loader.load() visit2_stats = get_stats(loader.storage) - assert visit2_actual_load_status["status"] == "eventful" + assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status expected_snapshot_id2 = hash_to_bytes("2e5149a7b0725d18231a37b342e9b7c4e121f283") assert visit2_actual_load_status == { "status": "eventful", diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py --- a/swh/loader/package/utils.py +++ b/swh/loader/package/utils.py @@ -4,11 +4,12 @@ # See top-level LICENSE file for more information import copy +import functools import logging import os import requests -from typing import Dict, Optional, Tuple +from typing import Callable, Dict, Optional, Tuple, TypeVar from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE from swh.model.model import Person @@ -121,3 +122,23 @@ if filename: return "releases/%s/%s" % (version, filename) return "releases/%s" % version + + +TReturn = TypeVar("TReturn") +TSelf = TypeVar("TSelf") + +_UNDEFINED = object() + + +def cached_method(f: Callable[[TSelf], TReturn]) -> Callable[[TSelf], TReturn]: + cache_name = f"_cached_{f.__name__}" + + @functools.wraps(f) + def newf(self): + value = getattr(self, cache_name, _UNDEFINED) + if value is _UNDEFINED: + value = f(self) + setattr(self, cache_name, value) + return value + + return newf