diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -35,7 +35,7 @@ INSTANCE = "crates" INDEX_REPOSITORY_URL = "https://github.com/rust-lang/crates.io-index.git" - INDEX_LOCAL_PATH = "/tmp/crates.io-index" + DESTINATION_PATH = "/tmp/crates.io-index" CRATE_FILE_URL_PATTERN = ( "https://static.crates.io/crates/{crate}/{crate}-{version}.crate" ) @@ -50,30 +50,23 @@ instance=self.INSTANCE, ) - def get_index_repository( - self, - index_repository_url=INDEX_REPOSITORY_URL, - index_local_path=INDEX_LOCAL_PATH, - ) -> None: + def get_index_repository(self) -> None: """Get crates.io-index.get repository up to date""" - if not os.path.exists(index_local_path): - subprocess.check_call( - ["git", "clone", index_repository_url, index_local_path,] - ) - else: - subprocess.check_call(["git", "-C", index_local_path, "fetch", "--all"]) + subprocess.check_call( + ["git", "clone", self.INDEX_REPOSITORY_URL, self.DESTINATION_PATH,] + ) - def get_crates_index(self, index_local_path=INDEX_LOCAL_PATH) -> List[Path]: + def get_crates_index(self) -> List[Path]: """Build a sorted list of file path excluding dotted directories and dotted files""" crates_index = sorted( [ path - for path in Path(index_local_path).rglob("*") + for path in Path(self.DESTINATION_PATH).rglob("*") if not any(part.startswith(".") for part in path.parts) and path.is_file() - and str(path) != os.path.join(index_local_path, "config.json") + and str(path) != os.path.join(self.DESTINATION_PATH, "config.json") ] ) @@ -89,7 +82,6 @@ self.get_index_repository() crates_index = self.get_crates_index() - logger.debug("found %s crates in crates_index" % len(crates_index)) for crate in crates_index: diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py --- a/swh/lister/crates/tests/test_lister.py +++ b/swh/lister/crates/tests/test_lister.py @@ -3,79 +3,85 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.lister.crates.lister import CratesLister - - -def mock_git_clone(self, *args, **kwargs): - if "clone" in args: - print("clone") - pass - - -def mock_git_pull(self, *args, **kwargs): - if "clone" in args: - print("clone") - if "pull" in args: - print("pull") - pass - - -def mock_repository(mock): - pass - - -def test_get_index_repository(): - pass - - -def test_get_crates_index(): - pass - - -def test_get_pages(): - pass +from pathlib import Path +from swh.lister.crates.lister import CratesLister +from swh.lister.crates.tests import prepare_repository_from_archive + +expected_origins = [ + { + "name": "rand", + "version": "0.1.1", + "checksum": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", + "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate", + }, + { + "name": "rand", + "version": "0.1.2", + "checksum": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", + "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate", + }, + { + "name": "regex", + "version": "0.1.0", + "checksum": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", + "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate", + }, + { + "name": "regex", + "version": "0.1.1", + "checksum": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", + "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate", + }, + { + "name": "regex", + "version": "0.1.2", + "checksum": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", + "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate", + }, + { + "name": "regex", + "version": "0.1.3", + "checksum": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", + "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate", + }, + { + "name": "regex-syntax", + "version": "0.1.0", + "checksum": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", + "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate", + }, +] + + +def test_crates_lister(datadir, tmp_path, swh_scheduler): + archive_path = Path(datadir, "fake-crates-repository.tar.gz") + repo_url = prepare_repository_from_archive( + archive_path, "crates.io-index", tmp_path + ) -def test_crates_lister(swh_scheduler): lister = CratesLister(scheduler=swh_scheduler) - assert lister is not None - - -# def test_mock_init_repository_init(mock_crates, tmp_path, datadir): -# """Initializing crates root directory with an instance should be ok -# -# """ -# mock_init, mock_popen = mock_crates -# -# instance = "fake" -# instance_url = f"file://{datadir}/{instance}" -# crates_root = str(tmp_path / "test-crates") -# assert not os.path.exists(crates_root) -# -# # This will initialize an crates directory with the instance -# crates_init(crates_root, instance, instance_url, {}) -# -# assert mock_init.called -# -# -# def test_mock_init_repository_update(mock_crates, tmp_path, datadir): -# """Updating crates root directory with another instance should be ok -# -# """ -# mock_init, mock_popen = mock_crates -# -# instance = "fake_crates_repo" -# instance_url = f"file://{datadir}/{instance}" -# crates_root = str(tmp_path / "test-crates") -# -# os.makedirs(crates_root, exist_ok=True) -# with open(os.path.join(crates_root, "crates"), "w") as f: -# f.write("one file to avoid empty folder") -# -# assert os.path.exists(crates_root) -# assert os.listdir(crates_root) == ["crates"] # not empty -# # This will update the repository crates with another instance -# crates_init(crates_root, instance, instance_url, {}) -# -# assert mock_init.called -# + lister.INDEX_REPOSITORY_URL = repo_url + lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests" + + res = lister.run() + + assert res.pages == 3 + assert res.origins == 7 + + expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url")) + scheduler_origins_sorted = sorted( + swh_scheduler.get_listed_origins(lister.lister_obj.id).results, + key=lambda x: x.url, + ) + + for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted): + assert scheduled.visit_type == "rust-crate" + assert scheduled.url == expected.get("url") + assert scheduled.extra_loader_arguments.get("name") == expected.get("name") + assert scheduled.extra_loader_arguments.get("version") == expected.get( + "version" + ) + assert scheduled.extra_loader_arguments.get("checksum") == expected.get( + "checksum" + )