Changeset View
Standalone View
swh/loader/package/functional/loader.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import requests | import requests | ||||
from typing import Dict, Optional, Any | from typing import Dict, Optional, Any, Mapping | ||||
from swh.model import hashutil | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Sha1Git, Revision, RevisionType | Sha1Git, Revision, RevisionType | ||||
) | ) | ||||
from swh.loader.package.utils import EMPTY_AUTHOR | from swh.loader.package.utils import EMPTY_AUTHOR | ||||
from swh.loader.package.loader import PackageLoader | from swh.loader.package.loader import PackageLoader | ||||
Show All 10 Lines | |||||
class FunctionalLoader(PackageLoader): | class FunctionalLoader(PackageLoader): | ||||
"""Load sources from a sources.json file. This loader is used to load | """Load sources from a sources.json file. This loader is used to load | ||||
sources used by functional package manager (eg. Nix and Guix). | sources used by functional package manager (eg. Nix and Guix). | ||||
""" | """ | ||||
visit_type = 'functional' | visit_type = 'functional' | ||||
def __init__(self, url): | def __init__(self, url): | ||||
ardumont: types
```
def __init__(self, url: str, origin: Optional[str]):
``` | |||||
Done Inline ActionsThe origin argument has been removed. lewo: The origin argument has been removed. | |||||
super().__init__(url=url) | super().__init__(url=url) | ||||
self.sources = retrieve_sources(url)['sources'] | s = retrieve_sources(url) | ||||
self.sources = s['sources'] | |||||
self.provider_url = url | self.provider_url = url | ||||
Done Inline ActionsMaybe mention it's the git revision of nixpkgs/guix. ardumont: Maybe mention it's the git revision of nixpkgs/guix. | |||||
# The revision used to create the sources.json file. For Nix, | |||||
# this revision belongs to the github.com/nixos/nixpkgs | |||||
# repository | |||||
Done Inline Actionsnitpick, keep only what you need, that is drop self.sources here :) ardumont: nitpick, keep only what you need, that is drop self.sources here :) | |||||
Done Inline ActionsIt is also used in the get_versions method lewo: It is also used in the `get_versions` method | |||||
Not Done Inline Actionsheh right, i misread (it was folded) ardumont: heh right, i misread (it was folded)
so nvm, my bad ;) | |||||
self.revision = s['revision'] | |||||
# Note: this could be renamed get_artifacts in the PackageLoader | # Note: this could be renamed get_artifacts in the PackageLoader | ||||
# base class. | # base class. | ||||
def get_versions(self): | def get_versions(self): | ||||
# TODO: try all mirrors and not only the first one. A source | # TODO: try all mirrors and not only the first one. A source | ||||
# can be fetched from several urls, called mirrors. We | # can be fetched from several urls, called mirrors. We | ||||
# currently only use the first one, but if the first one | # currently only use the first one, but if the first one | ||||
# fails, we should try the second one and so on. | # fails, we should try the second one and so on. | ||||
Show All 9 Lines | def resolve_revision_from( | ||||
self, known_artifacts: Dict, artifact_metadata: Dict) \ | self, known_artifacts: Dict, artifact_metadata: Dict) \ | ||||
-> Optional[bytes]: | -> Optional[bytes]: | ||||
for rev_id, known_artifact in known_artifacts.items(): | for rev_id, known_artifact in known_artifacts.items(): | ||||
known_url = known_artifact['extrinsic']['raw']['url'] | known_url = known_artifact['extrinsic']['raw']['url'] | ||||
if artifact_metadata['url'] == known_url: | if artifact_metadata['url'] == known_url: | ||||
return rev_id | return rev_id | ||||
return None | return None | ||||
def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: | |||||
"""We add a branch to the snapshot called 'evaluation' pointing to the | |||||
Done Inline ActionsPlease, add the hook branches's specific docstring use here. ardumont: Please, add the hook branches's specific docstring use here. | |||||
revision used to generate the sources.json file. This revision | |||||
is specified in the sources.json file itself. For the nixpkgs | |||||
origin, this revision is coming from the | |||||
github.com/nixos/nixpkgs repository. | |||||
Note this repository is not loaded explicitly. So, this | |||||
pointer can target a nonexistent revision for a time. However, | |||||
Done Inline Actionspointer can target a nonexistent revision for a time. supposed to load ardumont: pointer can `target` a `nonexistent` revision for a time.
supposed `to` load | |||||
the github and gnu loaders are supposed to load this revision | |||||
and should create the revision pointed by this branch. | |||||
This branch can be used to identify the snapshot associated to | |||||
a Nix/Guix evaluation. | |||||
""" | |||||
return { | |||||
b'evaluation': { | |||||
'target_type': 'revision', | |||||
'target': hashutil.hash_to_bytes(self.revision) | |||||
} | |||||
} | |||||
def build_revision(self, a_metadata: Dict, uncompressed_path: str, | def build_revision(self, a_metadata: Dict, uncompressed_path: str, | ||||
Done Inline ActionsInstead of mutating the reference here, make that return additional branches the main loader needs to append to its current list. The contract becomes clearer for that method. Maybe even rename hook_branches to extra_branches. ardumont: Instead of mutating the reference here, make that return additional branches the main loader… | |||||
Done Inline ActionsI initially wanted to create an extra_branches method, but it is difficult to know what to do if an extra branch overrides an existing one: since branches is a dict, we can not just append elements. I don't have a strong opinion on this. If you prefer, I could create the extra_branches method and raise an exception in the load method in case of overlap. lewo: I initially wanted to create an `extra_branches` method, but it is difficult to know what to do… | |||||
Done Inline Actions
Yes, i prefer this. ardumont: > I don't have a strong opinion on this. If you prefer, I could create the extra_branches… | |||||
directory: Sha1Git) -> Optional[Revision]: | directory: Sha1Git) -> Optional[Revision]: | ||||
return Revision( | return Revision( | ||||
type=RevisionType.TAR, | type=RevisionType.TAR, | ||||
message=b'', | message=b'', | ||||
author=EMPTY_AUTHOR, | author=EMPTY_AUTHOR, | ||||
date=None, | date=None, | ||||
committer=EMPTY_AUTHOR, | committer=EMPTY_AUTHOR, | ||||
committer_date=None, | committer_date=None, | ||||
Show All 11 Lines |
types