diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ lister.gnu=swh.lister.gnu:register lister.launchpad=swh.lister.launchpad:register lister.npm=swh.lister.npm:register + lister.opam=swh.lister.opam:register lister.packagist=swh.lister.packagist:register lister.phabricator=swh.lister.phabricator:register lister.pypi=swh.lister.pypi:register diff --git a/swh/lister/opam/__init__.py b/swh/lister/opam/__init__.py new file mode 100644 --- /dev/null +++ b/swh/lister/opam/__init__.py @@ -0,0 +1,12 @@ +# Copyright (C) 2019-2021 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .lister import OpamLister + + return { + "lister": OpamLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/opam/lister.py b/swh/lister/opam/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/opam/lister.py @@ -0,0 +1,88 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import io +import logging +import os +from os import system +from subprocess import PIPE, Popen +import tempfile +from typing import Iterator + +from swh.lister.pattern import StatelessLister +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import CredentialsType + +logger = logging.getLogger(__name__) + +PageType = str + + +class OpamLister(StatelessLister[PageType]): + """ + List all repositories hosted on an opam repository. + + Args: + url: base URL of an opam repository + (for instance https://opam.ocaml.org) + instance: string identifier for the listed repository + + """ + + # Part of the lister API, that identifies this lister + LISTER_NAME = "opam" + + def __init__( + self, + scheduler: SchedulerInterface, + url: str, + instance: str, + credentials: CredentialsType = None, + ): + super().__init__( + scheduler=scheduler, credentials=credentials, url=url, instance=instance, + ) + self.opamroot_path: str = tempfile.mkdtemp() + system( + "export OPAMROOT={}; opam init --reinit --bare --no-setup {} {}".format( + self.opamroot_path, instance, url + ) + ) + self.env = os.environ.copy() + self.env["OPAMROOT"] = self.opamroot_path + + def get_pages(self) -> Iterator[PageType]: + proc = Popen( + [ + "opam", + "list", + "--all", + "--no-switch", + "--repos", + self.instance, + "--normalise", + "--short", + ], + env=self.env, + stdout=PIPE, + ) + if proc.stdout is not None: + for line in io.TextIOWrapper(proc.stdout): + yield line.rstrip("\n") + else: + print("error, no stdout (get_pages)") + exit(1) + + def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]: + """Convert a page of OpamLister repositories into a list of ListedOrigins""" + assert self.lister_obj.id is not None + # a page is just a package name + url = "opam+{}/packages/{}/".format(self.url, page) + # print("adding url", url) + yield ListedOrigin( + lister_id=self.lister_obj.id, visit_type="opam", url=url, last_update=None + ) diff --git a/swh/lister/opam/tasks.py b/swh/lister/opam/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/opam/tasks.py @@ -0,0 +1,18 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.lister.opam.lister import OpamLister + + +@shared_task(name=__name__ + ".OpamListerTask") +def list_opam(**lister_args): + """Lister task for the Opam registry""" + return OpamLister.from_configfile(**lister_args).run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping(): + return "OK" diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -16,6 +16,7 @@ }, "gitea": {"url": "https://try.gitea.io/api/v1/",}, "gitlab": {"url": "https://gitlab.ow2.org/api/v4", "instance": "ow2",}, + "opam": {"url": "https://opam.ocaml.org", "instance": "opam"}, }