Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/opam/lister.py
- This file was added.
# Copyright (C) 2021 The Software Heritage developers | |||||||||||||||
# See the AUTHORS file at the top-level directory of this distribution | |||||||||||||||
# License: GNU General Public License version 3, or any later version | |||||||||||||||
# See top-level LICENSE file for more information | |||||||||||||||
import io | |||||||||||||||
import logging | |||||||||||||||
import os | |||||||||||||||
from subprocess import PIPE, Popen, call | |||||||||||||||
import tempfile | |||||||||||||||
from typing import Iterator | |||||||||||||||
from swh.lister.pattern import StatelessLister | |||||||||||||||
from swh.scheduler.interface import SchedulerInterface | |||||||||||||||
from swh.scheduler.model import ListedOrigin | |||||||||||||||
from ..pattern import CredentialsType | |||||||||||||||
logger = logging.getLogger(__name__) | |||||||||||||||
PageType = str | |||||||||||||||
class OpamLister(StatelessLister[PageType]): | |||||||||||||||
""" | |||||||||||||||
List all repositories hosted on an opam repository. | |||||||||||||||
ardumont: Can you please give some more details on how the opam command actually does the
bootstrap… | |||||||||||||||
Done Inline ActionsI meant explain a bit those details within the docstring. ardumont: I meant explain a bit those details within the docstring. | |||||||||||||||
On initialisation, we create an opam root, with no ocaml compiler (no switch) | |||||||||||||||
as we won't need it and it's costly. In this opam root, we add a single opam | |||||||||||||||
repository (url) and give it a name (instance). Then, to get pages, we just ask | |||||||||||||||
opam to list all the packages for our opam repository in our opam root. | |||||||||||||||
Args: | |||||||||||||||
url: base URL of an opam repository | |||||||||||||||
(for instance https://opam.ocaml.org) | |||||||||||||||
instance: string identifier for the listed repository | |||||||||||||||
""" | |||||||||||||||
# Part of the lister API, that identifies this lister | |||||||||||||||
LISTER_NAME = "opam" | |||||||||||||||
def __init__( | |||||||||||||||
self, | |||||||||||||||
scheduler: SchedulerInterface, | |||||||||||||||
url: str, | |||||||||||||||
instance: str, | |||||||||||||||
credentials: CredentialsType = None, | |||||||||||||||
): | |||||||||||||||
super().__init__( | |||||||||||||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance, | |||||||||||||||
) | |||||||||||||||
self.env = os.environ.copy() | |||||||||||||||
self.env["OPAMROOT"] = tempfile.mkdtemp(prefix="swh_opam_lister") | |||||||||||||||
Done Inline ActionsCould you use subprocess instead of running a shell? vlorentz: Could you use `subprocess` instead of running a shell? | |||||||||||||||
call( | |||||||||||||||
["opam", "init", "--reinit", "--bare", "--no-setup", instance, url], | |||||||||||||||
env=self.env, | |||||||||||||||
) | |||||||||||||||
def get_pages(self) -> Iterator[PageType]: | |||||||||||||||
proc = Popen( | |||||||||||||||
[ | |||||||||||||||
"opam", | |||||||||||||||
Done Inline Actions
@vlorentz I assume they are used here. ardumont: > I'm a little confused. When are all the files in data/fake_opam_repo/ used?
@vlorentz I… | |||||||||||||||
Done Inline ActionsActually they're used in the __init__ method. The url passed for the creation of the opam repository is a local one. Then, when we list all packages here, we get the results from this local repository. aleo: Actually they're used in the `__init__` method. The url passed for the creation of the opam… | |||||||||||||||
Not Done Inline Actionsoh, that's great. thanks vlorentz: oh, that's great. thanks | |||||||||||||||
"list", | |||||||||||||||
"--all", | |||||||||||||||
"--no-switch", | |||||||||||||||
"--repos", | |||||||||||||||
self.instance, | |||||||||||||||
"--normalise", | |||||||||||||||
"--short", | |||||||||||||||
], | |||||||||||||||
env=self.env, | |||||||||||||||
stdout=PIPE, | |||||||||||||||
) | |||||||||||||||
if proc.stdout is not None: | |||||||||||||||
for line in io.TextIOWrapper(proc.stdout): | |||||||||||||||
yield line.rstrip("\n") | |||||||||||||||
Done Inline Actions
When it's done, nothing more to list, then let the method exit normally ("do nothing here"). ardumont: When it's done, nothing more to list, then let the method exit normally ("do nothing here").
| |||||||||||||||
def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]: | |||||||||||||||
Done Inline Actionsyou can drop the comment now. ardumont: you can drop the comment now. | |||||||||||||||
"""Convert a page of OpamLister repositories into a list of ListedOrigins""" | |||||||||||||||
assert self.lister_obj.id is not None | |||||||||||||||
# a page is just a package name | |||||||||||||||
url = f"opam+{self.url}/packages/{page}/" | |||||||||||||||
yield ListedOrigin( | |||||||||||||||
Done Inline Actions
It's called an f-string and we tend to use it as it tends to be clearer (ymmv) (That's a nitpick) ardumont: It's called an f-string and we tend to use it as it tends to be clearer (ymmv)
(That's a… | |||||||||||||||
lister_id=self.lister_obj.id, visit_type="opam", url=url, last_update=None | |||||||||||||||
) |
Can you please give some more details on how the opam command actually does the
bootstrap listing? I think that will help understand better what the lister does and
also may avoid losing sight of what happens in that lister later.
Thanks in advance