diff --git a/swh/lister/opam/lister.py b/swh/lister/opam/lister.py --- a/swh/lister/opam/lister.py +++ b/swh/lister/opam/lister.py @@ -7,7 +7,7 @@ import logging import os from subprocess import PIPE, Popen, call -from typing import Iterator, Optional +from typing import Any, Dict, Iterator, Optional from swh.lister.pattern import StatelessLister from swh.scheduler.interface import SchedulerInterface @@ -53,24 +53,12 @@ self.env = os.environ.copy() # Opam root folder is initialized in the :meth:`get_pages` method as no # side-effect should happen in the constructor to ease instantiation - self.opamroot = os.path.join(opam_root, self.instance) + self.opam_root = opam_root def get_pages(self) -> Iterator[PageType]: - # Initialize the opam root directory with the opam instance data to list. - call( - [ - "opam", - "init", - "--reinit", - "--bare", - "--no-setup", - "--root", - self.opamroot, - self.instance, - self.url, - ], - env=self.env, - ) + # Initialize the opam root directory + opam_init(self.opam_root, self.instance, self.url, self.env) + # Actually list opam instance data proc = Popen( [ @@ -78,10 +66,11 @@ "list", "--all", "--no-switch", + "--safe", "--repos", self.instance, "--root", - self.opamroot, + self.opam_root, "--normalise", "--short", ], @@ -103,9 +92,50 @@ url=url, last_update=None, extra_loader_arguments={ - "opam_root": self.opamroot, + "opam_root": self.opam_root, "opam_instance": self.instance, "opam_url": self.url, "opam_package": page, }, ) + + +def opam_init(opam_root: str, instance: str, url: str, env: Dict[str, Any]) -> None: + """Initialize an opam_root folder. + + Args: + opam_root: The opam root folder to initialize + instance: Name of the opam repository to add or initialize + url: The associated url of the opam repository to add or initialize + env: The global environment to use for the opam command. + + Returns: + None. + + """ + if not os.path.exists(opam_root) or not os.listdir(opam_root): + command = [ + "opam", + "init", + "--reinit", + "--bare", + "--no-setup", + "--root", + opam_root, + instance, + url, + ] + else: + # The repository exists and is populated, we just add another instance in the + # repository. If it's already setup, it's a noop + command = [ + "opam", + "repository", + "add", + "--root", + opam_root, + instance, + url, + ] + # Actually execute the command + call(command, env=env) diff --git a/swh/lister/opam/tests/test_lister.py b/swh/lister/opam/tests/test_lister.py --- a/swh/lister/opam/tests/test_lister.py +++ b/swh/lister/opam/tests/test_lister.py @@ -4,12 +4,13 @@ # See top-level LICENSE file for more information import io +import os from tempfile import mkdtemp from unittest.mock import MagicMock import pytest -from swh.lister.opam.lister import OpamLister +from swh.lister.opam.lister import OpamLister, opam_init module_name = "swh.lister.opam.lister" @@ -29,6 +30,66 @@ return mock_init, mock_open +def test_mock_init_repository_init(mock_opam, tmp_path, datadir): + """Fixture to bypass the actual opam calls within the test context. + + """ + mock_init, mock_popen = mock_opam + + instance = "fake" + instance_url = f"file://{datadir}/{instance}" + opam_root = str(tmp_path / "test-opam") + assert not os.path.exists(opam_root) + + # This will initialize an opam directory with the instance + opam_init(opam_root, instance, instance_url, {}) + + assert mock_init.called + assert mock_init.call_args.args[0] == [ + "opam", + "init", + "--reinit", + "--bare", + "--no-setup", + "--root", + opam_root, + instance, + instance_url, + ] + + +def test_mock_init_repository_update(mock_opam, tmp_path, datadir): + """Fixture to bypass the actual opam calls within the test context. + + """ + mock_init, mock_popen = mock_opam + + instance = "fake_opam_repo" + instance_url = f"file://{datadir}/{instance}" + opam_root = str(tmp_path / "test-opam") + + os.makedirs(opam_root, exist_ok=True) + with open(os.path.join(opam_root, "opam"), "w") as f: + f.write("one file to avoid empty folder") + + assert os.path.exists(opam_root) + assert os.listdir(opam_root) == ["opam"] # not empty + + # This will add a new instance + opam_init(opam_root, instance, instance_url, {}) + + assert mock_init.called + assert mock_init.call_args.args[0] == [ + "opam", + "repository", + "add", + "--root", + opam_root, + instance, + instance_url, + ] + + def test_lister_opam_optional_instance(swh_scheduler): """Instance name should be optional and default to be built out of the netloc.""" netloc = "opam.ocaml.org" @@ -36,21 +97,19 @@ lister = OpamLister(swh_scheduler, url=instance_url,) assert lister.instance == netloc - assert lister.opamroot.endswith(lister.instance) + assert lister.opam_root == "/tmp/opam/" def test_urls(swh_scheduler, mock_opam): mock_init, mock_popen = mock_opam - instance_url = "https://opam.ocaml.org" + tmp_folder = mkdtemp(prefix="swh_opam_lister") lister = OpamLister( - swh_scheduler, - url=instance_url, - instance="opam", - opam_root=mkdtemp(prefix="swh_opam_lister"), + swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder, ) assert lister.instance == "opam" + assert lister.opam_root == tmp_folder # call the lister and get all listed origins urls stats = lister.run() @@ -101,3 +160,32 @@ result_urls = [origin.url for origin in scheduler_origins] assert expected_urls == result_urls + + +def test_opam_multi_instance(datadir, swh_scheduler): + instance_url = f"file://{datadir}/fake_opam_repo" + + lister = OpamLister( + swh_scheduler, + url=instance_url, + instance="fake", + opam_root=mkdtemp(prefix="swh_opam_lister"), + ) + + stats = lister.run() + + assert stats.pages == 4 + assert stats.origins == 4 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + expected_urls = [ + f"opam+{instance_url}/packages/agrid/", + f"opam+{instance_url}/packages/calculon/", + f"opam+{instance_url}/packages/directories/", + f"opam+{instance_url}/packages/ocb/", + ] + + result_urls = [origin.url for origin in scheduler_origins] + + assert expected_urls == result_urls