Changeset View
Standalone View
swh/loader/package/functional/loader.py
- This file was added.
# Copyright (C) 2020 The Software Heritage developers | |||||
ardumont: Not urgent but please add the copyright headers at some point. | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import json | |||||
import requests | |||||
from typing import Dict, Optional, Any | |||||
from swh.model.model import ( | |||||
Sha1Git, Revision, RevisionType | |||||
) | |||||
from swh.loader.package.utils import EMPTY_AUTHOR | |||||
Done Inline ActionsPlease add a docstring, even if it's a basic one. Also add the visit_type attribute, it's a text form. visit_type = 'functional' (or something) ardumont: Please add a docstring, even if it's a basic one.
Also add the visit_type attribute, it's a… | |||||
from swh.loader.package.loader import PackageLoader | |||||
class FunctionalLoader(PackageLoader): | |||||
"""Load sources from a sources.json file. This loader is used to load | |||||
Done Inline ActionsPlease for new code, try to add the types. ardumont: Please for new code, try to add the types. | |||||
sources used by functional package manager (eg. Nix and Guix). | |||||
""" | |||||
visit_type = 'functional' | |||||
def __init__(self, url, origin=None): | |||||
super().__init__(url=url, origin=origin) | |||||
self.sources = self._retrieve_sources()['sources'] | |||||
self.provider_url = url | |||||
def _retrieve_sources(self) -> Dict[str, Any]: | |||||
Done Inline ActionsWe'll need to describe this in the dosctring at least. i recall the discussion lead to provide multiple urls for fallback in case of issues retrieving artifacts. ardumont: We'll need to describe this in the dosctring at least.
i recall the discussion lead to provide… | |||||
Done Inline ActionsIt looks strange to me to put a TODO in the the docstring :/ lewo: It looks strange to me to put a TODO in the the docstring :/ | |||||
Not Done Inline ActionsWell, given that we sayed we'd incrementally merge the incomplete functionality. It'd be annoying the fixme stayed forever though ;) ardumont: Well, given that we sayed we'd incrementally merge the incomplete functionality.
It's not a… | |||||
response = requests.get(self.url, | |||||
allow_redirects=True) | |||||
if response.status_code != 200: | |||||
raise ValueError("Got %d HTTP code on %s", | |||||
response.status_code, self.url) | |||||
return json.loads(response.content.decode('utf-8')) | |||||
Done Inline ActionsI'd make this a function with the url as parameter. This way, this can be tested independently of the loader instantiation. ardumont: I'd make this a function with the url as parameter.
This way, this can be tested independently… | |||||
# This could be renamed get_artifacts | |||||
def get_versions(self): | |||||
"""TODO: try all mirrors and not only the first one. A source can be | |||||
fetched from several urls, called mirrors. We currently only | |||||
use the first one, but if the first one fails, we should try | |||||
the second one and so on. | |||||
""" | |||||
return [s['url'][0] for s in self.sources] | |||||
# This could be renamed get_artifact_info | |||||
def get_package_info(self, source): | |||||
# TODO: we need to provide the sha256 of the source also | |||||
yield source, {'url': source, 'raw': {'url': source}} | |||||
def resolve_revision_from( | |||||
self, known_artifacts: Dict, artifact_metadata: Dict) \ | |||||
-> Optional[bytes]: | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
known_url = known_artifact['extrinsic']['raw']['url'] | |||||
if artifact_metadata['url'] == known_url: | |||||
return rev_id | |||||
return None | |||||
Done Inline ActionsAdd the following entries to decrease the divergence between loaders. 'provider': self.provider_url, 'when': self.visit_date.isoformat(), provider_url is a canonical url providing the information. ardumont: Add the following entries to decrease the divergence between loaders.
```… | |||||
def build_revision(self, a_metadata: Dict, uncompressed_path: str, | |||||
directory: Sha1Git) -> Optional[Revision]: | |||||
return Revision( | |||||
type=RevisionType.TAR, | |||||
message=b'', | |||||
author=EMPTY_AUTHOR, | |||||
date=None, | |||||
committer=EMPTY_AUTHOR, | |||||
committer_date=None, | |||||
parents=[], | |||||
directory=directory, | |||||
synthetic=True, | |||||
metadata={ | |||||
Done Inline Actions@olasd Here is where I add a dangling pointer to the nixpkgs revision used to create the sources.json. Did you have something like that in mind? lewo: @olasd Here is where I add a dangling pointer to the nixpkgs revision used to create the… | |||||
Done Inline ActionsI actually created another diff with the "evaluation revision" feature: see D2807 instead. lewo: I actually created another diff with the "evaluation revision" feature: see D2807 instead. | |||||
'extrinsic': { | |||||
'provider': self.provider_url, | |||||
'when': self.visit_date.isoformat(), | |||||
'raw': a_metadata, | |||||
}, | |||||
} | |||||
) | |||||
Not Done Inline ActionsEven though the main functionality is incomplete, this feels pretty empty. @douardda what do you think, couldn't be add more stuff in there? ardumont: Even though the main functionality is incomplete, this feels pretty empty.
@douardda what do… |
Not urgent but please add the copyright headers at some point.