Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/nixguix/loader.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
import requests | import requests | ||||
from typing import Any, Dict, Iterator, Mapping, Optional, Tuple | from typing import Any, Dict, Iterator, Mapping, Optional, Tuple | ||||
import attr | import attr | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | |||||
MetadataAuthorityType, | |||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
TargetType, | TargetType, | ||||
Snapshot, | Snapshot, | ||||
BaseModel, | BaseModel, | ||||
Sha1Git, | Sha1Git, | ||||
) | ) | ||||
from swh.loader.package.utils import EMPTY_AUTHOR | from swh.loader.package.utils import EMPTY_AUTHOR | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import ( | ||||
BasePackageInfo, | |||||
PackageLoader, | |||||
RawExtrinsicMetadataCore, | |||||
) | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
@attr.s | @attr.s | ||||
class NixGuixPackageInfo(BasePackageInfo): | class NixGuixPackageInfo(BasePackageInfo): | ||||
raw_info = attr.ib(type=Dict[str, Any]) | raw_info = attr.ib(type=Dict[str, Any]) | ||||
Show All 17 Lines | class NixGuixLoader(PackageLoader[NixGuixPackageInfo]): | ||||
sources used by functional package manager (eg. Nix and Guix). | sources used by functional package manager (eg. Nix and Guix). | ||||
""" | """ | ||||
visit_type = "nixguix" | visit_type = "nixguix" | ||||
def __init__(self, url): | def __init__(self, url): | ||||
super().__init__(url=url) | super().__init__(url=url) | ||||
raw = retrieve_sources(url) | self.raw_sources = retrieve_sources(url) | ||||
clean = clean_sources(raw) | clean = clean_sources(parse_sources(self.raw_sources)) | ||||
self.sources = clean["sources"] | self.sources = clean["sources"] | ||||
self.provider_url = url | self.provider_url = url | ||||
self._integrityByUrl = {s["urls"][0]: s["integrity"] for s in self.sources} | self._integrityByUrl = {s["urls"][0]: s["integrity"] for s in self.sources} | ||||
# The revision used to create the sources.json file. For Nix, | # The revision used to create the sources.json file. For Nix, | ||||
# this revision belongs to the github.com/nixos/nixpkgs | # this revision belongs to the github.com/nixos/nixpkgs | ||||
# repository | # repository | ||||
self.revision = clean["revision"] | self.revision = clean["revision"] | ||||
# Note: this could be renamed get_artifacts in the PackageLoader | # Note: this could be renamed get_artifacts in the PackageLoader | ||||
# base class. | # base class. | ||||
def get_versions(self): | def get_versions(self): | ||||
"""The first mirror of the mirror list is used as branch name in the | """The first mirror of the mirror list is used as branch name in the | ||||
snapshot. | snapshot. | ||||
""" | """ | ||||
return self._integrityByUrl.keys() | return self._integrityByUrl.keys() | ||||
def get_metadata_authority(self): | |||||
return MetadataAuthority( | |||||
type=MetadataAuthorityType.FORGE, url=self.url, metadata={}, | |||||
) | |||||
def get_extrinsic_snapshot_metadata(self): | |||||
return [ | |||||
RawExtrinsicMetadataCore( | |||||
format="nixguix-sources-json", | |||||
metadata=self.raw_sources, | |||||
discovery_date=None, | |||||
), | |||||
] | |||||
# Note: this could be renamed get_artifact_info in the PackageLoader | # Note: this could be renamed get_artifact_info in the PackageLoader | ||||
# base class. | # base class. | ||||
def get_package_info(self, url) -> Iterator[Tuple[str, NixGuixPackageInfo]]: | def get_package_info(self, url) -> Iterator[Tuple[str, NixGuixPackageInfo]]: | ||||
# TODO: try all mirrors and not only the first one. A source | # TODO: try all mirrors and not only the first one. A source | ||||
# can be fetched from several urls, called mirrors. We | # can be fetched from several urls, called mirrors. We | ||||
# currently only use the first one, but if the first one | # currently only use the first one, but if the first one | ||||
# fails, we should try the second one and so on. | # fails, we should try the second one and so on. | ||||
integrity = self._integrityByUrl[url] | integrity = self._integrityByUrl[url] | ||||
▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines | ) -> Optional[Revision]: | ||||
"provider": self.provider_url, | "provider": self.provider_url, | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": p_info.raw_info, | "raw": p_info.raw_info, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
def retrieve_sources(url: str) -> Dict[str, Any]: | def retrieve_sources(url: str) -> bytes: | ||||
response = requests.get(url, allow_redirects=True) | response = requests.get(url, allow_redirects=True) | ||||
if response.status_code != 200: | if response.status_code != 200: | ||||
raise ValueError("Got %d HTTP code on %s", response.status_code, url) | raise ValueError("Got %d HTTP code on %s", response.status_code, url) | ||||
return json.loads(response.content.decode("utf-8")) | return response.content | ||||
def parse_sources(raw_sources: bytes) -> Dict[str, Any]: | |||||
return json.loads(raw_sources.decode("utf-8")) | |||||
def clean_sources(sources: Dict[str, Any]) -> Dict[str, Any]: | def clean_sources(sources: Dict[str, Any]) -> Dict[str, Any]: | ||||
"""Validate and clean the sources structure. First, it ensures all top | """Validate and clean the sources structure. First, it ensures all top | ||||
level keys are presents. Then, it walks on the sources list | level keys are presents. Then, it walks on the sources list | ||||
and removes sources that don't contain required keys. | and removes sources that don't contain required keys. | ||||
Raises: | Raises: | ||||
▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines |