Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/nixguix/loader.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | |||||
import requests | import requests | ||||
from typing import Dict, Optional, Any, Mapping | from typing import Dict, Optional, Any, Mapping | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Sha1Git, Revision, RevisionType | Sha1Git, Revision, RevisionType | ||||
) | ) | ||||
from swh.loader.package.utils import EMPTY_AUTHOR | from swh.loader.package.utils import EMPTY_AUTHOR | ||||
from swh.loader.package.loader import PackageLoader | from swh.loader.package.loader import PackageLoader | ||||
logger = logging.getLogger(__name__) | |||||
def retrieve_sources(url: str) -> Dict[str, Any]: | def retrieve_sources(url: str) -> Dict[str, Any]: | ||||
response = requests.get(url, | response = requests.get(url, | ||||
allow_redirects=True) | allow_redirects=True) | ||||
if response.status_code != 200: | if response.status_code != 200: | ||||
raise ValueError("Got %d HTTP code on %s", | raise ValueError("Got %d HTTP code on %s", | ||||
response.status_code, url) | response.status_code, url) | ||||
return json.loads(response.content.decode('utf-8')) | return json.loads(response.content.decode('utf-8')) | ||||
def clean_sources(sources: Dict[str, Any]) -> Dict[str, Any]: | |||||
"""Validate and clean the sources structure. First, it ensures all top | |||||
level keys are presents. Then, it walks on the sources list | |||||
and removes sources that don't contain required keys. | |||||
Raises: | |||||
ValueError: if a top level key is missing | |||||
""" | |||||
# Required top level keys | |||||
required_keys = ['version', 'revision', 'sources'] | |||||
missing_keys = [] | |||||
for required_key in required_keys: | |||||
ardumont: must `contain` ;) | |||||
if required_key not in sources: | |||||
Not Done Inline ActionsMake it more generic, something like: missing_keys = [] for required_key in requires_keys: if required_key not in sources: missing_keys.append(required_key) if missing_keys: raise ValueError("sources structure invalid, missing: %s", ",".join(missing_keys)) # you can also give the full expected list... or something similar ardumont: Make it more generic, something like:
```
missing_keys = []
for required_key in requires_keys… | |||||
missing_keys.append(required_key) | |||||
if missing_keys != []: | |||||
raise ValueError("sources structure invalid, missing: %s", | |||||
",".join(missing_keys)) | |||||
# Only the version 1 is currently supported | |||||
if sources['version'] != 1: | |||||
raise ValueError("The sources structure version '%d' is not supported", | |||||
sources['version']) | |||||
# If a source doesn't contain required attributes, this source is | |||||
# skipped but others could still be archived. | |||||
verified_sources = [] | |||||
for source in sources['sources']: | |||||
valid = True | |||||
required_keys = ['urls', 'integrity', 'type'] | |||||
for required_key in required_keys: | |||||
if required_key not in source: | |||||
logger.info("Skip source '%s' because key '%s' is missing", | |||||
source, required_key) | |||||
valid = False | |||||
if source['type'] != 'url': | |||||
Not Done Inline Actionscurious me, what's the difference with not isinstance(source['urls'], list)? ardumont: curious me, what's the difference with `not isinstance(source['urls'], list)`? | |||||
Done Inline Actionsisinstance also works on indirect instance of list while my implementation only worked on list itself. But I think isinstance is better than my tricks:/ So, i switched to isinstance. lewo: `isinstance` also works on indirect instance of `list` while my implementation only worked on… | |||||
logger.info( | |||||
"Skip source '%s' because the type %s is not supported", | |||||
source, source['type']) | |||||
valid = False | |||||
if not isinstance(source['urls'], list): | |||||
logger.info( | |||||
"Skip source '%s' because the urls attribute is not a list", | |||||
source) | |||||
valid = False | |||||
if valid: | |||||
verified_sources.append(source) | |||||
sources['sources'] = verified_sources | |||||
return sources | |||||
class NixGuixLoader(PackageLoader): | class NixGuixLoader(PackageLoader): | ||||
"""Load sources from a sources.json file. This loader is used to load | """Load sources from a sources.json file. This loader is used to load | ||||
sources used by functional package manager (eg. Nix and Guix). | sources used by functional package manager (eg. Nix and Guix). | ||||
""" | """ | ||||
visit_type = 'nixguix' | visit_type = 'nixguix' | ||||
def __init__(self, url): | def __init__(self, url): | ||||
super().__init__(url=url) | super().__init__(url=url) | ||||
s = retrieve_sources(url) | raw = retrieve_sources(url) | ||||
self.sources = s['sources'] | clean = clean_sources(raw) | ||||
self.sources = clean['sources'] | |||||
self.provider_url = url | self.provider_url = url | ||||
self._integrityByUrl = {s['urls'][0]: s['integrity'] | self._integrityByUrl = {s['urls'][0]: s['integrity'] | ||||
for s in self.sources} | for s in self.sources} | ||||
# The revision used to create the sources.json file. For Nix, | # The revision used to create the sources.json file. For Nix, | ||||
# this revision belongs to the github.com/nixos/nixpkgs | # this revision belongs to the github.com/nixos/nixpkgs | ||||
# repository | # repository | ||||
self.revision = s['revision'] | self.revision = clean['revision'] | ||||
# Note: this could be renamed get_artifacts in the PackageLoader | # Note: this could be renamed get_artifacts in the PackageLoader | ||||
# base class. | # base class. | ||||
def get_versions(self): | def get_versions(self): | ||||
"""The first mirror of the mirror list is used as branch name in the | """The first mirror of the mirror list is used as branch name in the | ||||
snapshot. | snapshot. | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 68 Lines • Show Last 20 Lines |
must contain ;)