diff --git a/swh/lister/opam/lister.py b/swh/lister/opam/lister.py --- a/swh/lister/opam/lister.py +++ b/swh/lister/opam/lister.py @@ -7,7 +7,7 @@ import logging import os from subprocess import PIPE, Popen, call -from typing import Iterator, Optional +from typing import Any, Dict, Iterator, Optional from swh.lister.pattern import StatelessLister from swh.scheduler.interface import SchedulerInterface @@ -53,24 +53,12 @@ self.env = os.environ.copy() # Opam root folder is initialized in the :meth:`get_pages` method as no # side-effect should happen in the constructor to ease instantiation - self.opamroot = os.path.join(opam_root, self.instance) + self.opam_root = opam_root def get_pages(self) -> Iterator[PageType]: - # Initialize the opam root directory with the opam instance data to list. - call( - [ - "opam", - "init", - "--reinit", - "--bare", - "--no-setup", - "--root", - self.opamroot, - self.instance, - self.url, - ], - env=self.env, - ) + # Initialize the opam root directory + opam_init(self.opam_root, self.instance, self.url, self.env) + # Actually list opam instance data proc = Popen( [ @@ -81,7 +69,7 @@ "--repos", self.instance, "--root", - self.opamroot, + self.opam_root, "--normalise", "--short", ], @@ -103,9 +91,50 @@ url=url, last_update=None, extra_loader_arguments={ - "opam_root": self.opamroot, + "opam_root": self.opam_root, "opam_instance": self.instance, "opam_url": self.url, "opam_package": page, }, ) + + +def opam_init(opam_root: str, instance: str, url: str, env: Dict[str, Any]) -> None: + """Initialize an opam_root folder. + + Args: + opam_root: The opam root folder to initialize + instance: Name of the opam repository to add or initialize + url: The associated url of the opam repository to add or initialize + env: The global environment to use for the opam command. + + Returns: + None. + + """ + if not os.path.exists(opam_root) or not os.listdir(opam_root): + command = [ + "opam", + "init", + "--reinit", + "--bare", + "--no-setup", + "--root", + opam_root, + instance, + url, + ] + else: + # The repository exists and is populated, we just add another instance in the + # repository. If it's already setup, it's a noop + command = [ + "opam", + "repository", + "add", + "--root", + opam_root, + instance, + url, + ] + # Actually execute the command + call(command, env=env) diff --git a/swh/lister/opam/tests/test_lister.py b/swh/lister/opam/tests/test_lister.py --- a/swh/lister/opam/tests/test_lister.py +++ b/swh/lister/opam/tests/test_lister.py @@ -36,21 +36,19 @@ lister = OpamLister(swh_scheduler, url=instance_url,) assert lister.instance == netloc - assert lister.opamroot.endswith(lister.instance) + assert lister.opam_root == "/tmp/opam/" def test_urls(swh_scheduler, mock_opam): mock_init, mock_popen = mock_opam - instance_url = "https://opam.ocaml.org" + tmp_folder = mkdtemp(prefix="swh_opam_lister") lister = OpamLister( - swh_scheduler, - url=instance_url, - instance="opam", - opam_root=mkdtemp(prefix="swh_opam_lister"), + swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder, ) assert lister.instance == "opam" + assert lister.opam_root == tmp_folder # call the lister and get all listed origins urls stats = lister.run() @@ -101,3 +99,32 @@ result_urls = [origin.url for origin in scheduler_origins] assert expected_urls == result_urls + + +def test_opam_multi_instance(datadir, swh_scheduler): + instance_url = f"file://{datadir}/fake_opam_repo" + + lister = OpamLister( + swh_scheduler, + url=instance_url, + instance="fake", + opam_root=mkdtemp(prefix="swh_opam_lister"), + ) + + stats = lister.run() + + assert stats.pages == 4 + assert stats.origins == 4 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + expected_urls = [ + f"opam+{instance_url}/packages/agrid/", + f"opam+{instance_url}/packages/calculon/", + f"opam+{instance_url}/packages/directories/", + f"opam+{instance_url}/packages/ocb/", + ] + + result_urls = [origin.url for origin in scheduler_origins] + + assert expected_urls == result_urls