Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/cran/tests/test_cran.py
Show First 20 Lines • Show All 115 Lines • ▼ Show 20 Lines | for date, expected_date in data: | ||||
if expected_date is None: | if expected_date is None: | ||||
assert actual_tstz is None, date | assert actual_tstz is None, date | ||||
else: | else: | ||||
expected_tstz = TimestampWithTimezone.from_datetime(expected_date) | expected_tstz = TimestampWithTimezone.from_datetime(expected_date) | ||||
assert actual_tstz == expected_tstz, date | assert actual_tstz == expected_tstz, date | ||||
@pytest.mark.fs | @pytest.mark.fs | ||||
def test_extract_intrinsic_metadata(tmp_path, datadir): | def test_cran_extract_intrinsic_metadata(tmp_path, datadir): | ||||
"""Parsing existing archive's PKG-INFO should yield results""" | """Parsing existing archive's PKG-INFO should yield results""" | ||||
uncompressed_archive_path = str(tmp_path) | uncompressed_archive_path = str(tmp_path) | ||||
# sample url | # sample url | ||||
# https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz # noqa | # https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz # noqa | ||||
archive_path = path.join( | archive_path = path.join( | ||||
datadir, | datadir, | ||||
"https_cran.r-project.org", | "https_cran.r-project.org", | ||||
"src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz", | "src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz", | ||||
Show All 14 Lines | expected_metadata = { | ||||
"License": "Unlimited use and distribution (see LICENCE).", | "License": "Unlimited use and distribution (see LICENCE).", | ||||
"URL": "http://www.biostat.harvard.edu/~mwand", | "URL": "http://www.biostat.harvard.edu/~mwand", | ||||
} | } | ||||
assert actual_metadata == expected_metadata | assert actual_metadata == expected_metadata | ||||
@pytest.mark.fs | @pytest.mark.fs | ||||
def test_extract_intrinsic_metadata_failures(tmp_path): | def test_cran_extract_intrinsic_metadata_failures(tmp_path): | ||||
"""Parsing inexistent path/archive/PKG-INFO yield None""" | """Parsing inexistent path/archive/PKG-INFO yield None""" | ||||
# inexistent first level path | # inexistent first level path | ||||
assert extract_intrinsic_metadata("/something-inexistent") == {} | assert extract_intrinsic_metadata("/something-inexistent") == {} | ||||
# inexistent second level path (as expected by pypi archives) | # inexistent second level path (as expected by pypi archives) | ||||
assert extract_intrinsic_metadata(tmp_path) == {} | assert extract_intrinsic_metadata(tmp_path) == {} | ||||
# inexistent PKG-INFO within second level path | # inexistent PKG-INFO within second level path | ||||
existing_path_no_pkginfo = str(tmp_path / "something") | existing_path_no_pkginfo = str(tmp_path / "something") | ||||
os.mkdir(existing_path_no_pkginfo) | os.mkdir(existing_path_no_pkginfo) | ||||
assert extract_intrinsic_metadata(tmp_path) == {} | assert extract_intrinsic_metadata(tmp_path) == {} | ||||
def test_cran_one_visit(swh_config, requests_mock_datadir): | def test_cran_one_visit(swh_storage, requests_mock_datadir): | ||||
version = "2.22-6" | version = "2.22-6" | ||||
base_url = "https://cran.r-project.org" | base_url = "https://cran.r-project.org" | ||||
origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html" | origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html" | ||||
artifact_url = ( | artifact_url = ( | ||||
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa | f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa | ||||
) | ) | ||||
loader = CRANLoader( | loader = CRANLoader( | ||||
origin_url, artifacts=[{"url": artifact_url, "version": version,}] | swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version,}] | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": SNAPSHOT.id.hex(), | "snapshot_id": SNAPSHOT.id.hex(), | ||||
} | } | ||||
check_snapshot(SNAPSHOT, loader.storage) | check_snapshot(SNAPSHOT, swh_storage) | ||||
assert_last_visit_matches(loader.storage, origin_url, status="full", type="cran") | assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran") | ||||
visit_stats = get_stats(loader.storage) | visit_stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 33, | "content": 33, | ||||
"directory": 7, | "directory": 7, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == visit_stats | } == visit_stats | ||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith(base_url) | if m.url.startswith(base_url) | ||||
] | ] | ||||
# visited each artifact once across 2 visits | # visited each artifact once across 2 visits | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
def test_cran_2_visits_same_origin(swh_config, requests_mock_datadir): | def test_cran_2_visits_same_origin(swh_storage, requests_mock_datadir): | ||||
"""Multiple visits on the same origin, only 1 archive fetch""" | """Multiple visits on the same origin, only 1 archive fetch""" | ||||
version = "2.22-6" | version = "2.22-6" | ||||
base_url = "https://cran.r-project.org" | base_url = "https://cran.r-project.org" | ||||
origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html" | origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html" | ||||
artifact_url = ( | artifact_url = ( | ||||
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa | f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa | ||||
) | ) | ||||
loader = CRANLoader( | loader = CRANLoader( | ||||
origin_url, artifacts=[{"url": artifact_url, "version": version}] | swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version}] | ||||
) | ) | ||||
# first visit | # first visit | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "920adcccc78aaeedd3cfa4459dd900d8c3431a21" | expected_snapshot_id = "920adcccc78aaeedd3cfa4459dd900d8c3431a21" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": SNAPSHOT.id.hex(), | "snapshot_id": SNAPSHOT.id.hex(), | ||||
} | } | ||||
check_snapshot(SNAPSHOT, loader.storage) | check_snapshot(SNAPSHOT, swh_storage) | ||||
assert_last_visit_matches(loader.storage, origin_url, status="full", type="cran") | assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran") | ||||
visit_stats = get_stats(loader.storage) | visit_stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 33, | "content": 33, | ||||
"directory": 7, | "directory": 7, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == visit_stats | } == visit_stats | ||||
# second visit | # second visit | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2 == { | assert actual_load_status2 == { | ||||
"status": "uneventful", | "status": "uneventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, origin_url, status="full", type="cran") | assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran") | ||||
visit_stats2 = get_stats(loader.storage) | visit_stats2 = get_stats(swh_storage) | ||||
visit_stats["origin_visit"] += 1 | visit_stats["origin_visit"] += 1 | ||||
assert visit_stats2 == visit_stats, "same stats as 1st visit, +1 visit" | assert visit_stats2 == visit_stats, "same stats as 1st visit, +1 visit" | ||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith(base_url) | if m.url.startswith(base_url) | ||||
] | ] | ||||
assert len(urls) == 1, "visited one time artifact url (across 2 visits)" | assert len(urls) == 1, "visited one time artifact url (across 2 visits)" | ||||
def test_parse_debian_control(datadir): | def test_cran_parse_debian_control(datadir): | ||||
description_file = os.path.join(datadir, "description", "acepack") | description_file = os.path.join(datadir, "description", "acepack") | ||||
actual_metadata = parse_debian_control(description_file) | actual_metadata = parse_debian_control(description_file) | ||||
assert actual_metadata == { | assert actual_metadata == { | ||||
"Package": "acepack", | "Package": "acepack", | ||||
"Maintainer": "Shawn Garbett", | "Maintainer": "Shawn Garbett", | ||||
"Version": "1.4.1", | "Version": "1.4.1", | ||||
"Author": "Phil Spector, Jerome Friedman, Robert Tibshirani...", | "Author": "Phil Spector, Jerome Friedman, Robert Tibshirani...", | ||||
"Description": "Two nonparametric methods for multiple regression...", | "Description": "Two nonparametric methods for multiple regression...", | ||||
"Title": "ACE & AVAS 4 Selecting Multiple Regression Transformations", | "Title": "ACE & AVAS 4 Selecting Multiple Regression Transformations", | ||||
"License": "MIT + file LICENSE", | "License": "MIT + file LICENSE", | ||||
"Suggests": "testthat", | "Suggests": "testthat", | ||||
"Packaged": "2016-10-28 15:38:59 UTC; garbetsp", | "Packaged": "2016-10-28 15:38:59 UTC; garbetsp", | ||||
"Repository": "CRAN", | "Repository": "CRAN", | ||||
"Date/Publication": "2016-10-29 00:11:52", | "Date/Publication": "2016-10-29 00:11:52", | ||||
"NeedsCompilation": "yes", | "NeedsCompilation": "yes", | ||||
} | } | ||||
def test_parse_debian_control_unicode_issue(datadir): | def test_cran_parse_debian_control_unicode_issue(datadir): | ||||
# iso-8859-1 caused failure, now fixed | # iso-8859-1 caused failure, now fixed | ||||
description_file = os.path.join(datadir, "description", "KnownBR") | description_file = os.path.join(datadir, "description", "KnownBR") | ||||
actual_metadata = parse_debian_control(description_file) | actual_metadata = parse_debian_control(description_file) | ||||
assert actual_metadata == { | assert actual_metadata == { | ||||
"Package": "KnowBR", | "Package": "KnowBR", | ||||
"Version": "2.0", | "Version": "2.0", | ||||
Show All 13 Lines | def test_cran_parse_debian_control_unicode_issue(datadir): | ||||
} | } | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"method_name", | "method_name", | ||||
["build_extrinsic_snapshot_metadata", "build_extrinsic_origin_metadata",], | ["build_extrinsic_snapshot_metadata", "build_extrinsic_origin_metadata",], | ||||
) | ) | ||||
def test_cran_fail_to_build_or_load_extrinsic_metadata( | def test_cran_fail_to_build_or_load_extrinsic_metadata( | ||||
method_name, swh_config, requests_mock_datadir | method_name, swh_storage, requests_mock_datadir | ||||
): | ): | ||||
"""problem during loading: {visit: failed, status: failed, no snapshot} | """problem during loading: {visit: failed, status: failed, no snapshot} | ||||
""" | """ | ||||
version = "2.22-6" | version = "2.22-6" | ||||
base_url = "https://cran.r-project.org" | base_url = "https://cran.r-project.org" | ||||
origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html" | origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html" | ||||
artifact_url = ( | artifact_url = ( | ||||
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa | f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa | ||||
) | ) | ||||
full_method_name = f"swh.loader.package.cran.loader.CRANLoader.{method_name}" | full_method_name = f"swh.loader.package.cran.loader.CRANLoader.{method_name}" | ||||
with patch( | with patch( | ||||
full_method_name, | full_method_name, | ||||
side_effect=ValueError("Fake to fail to build or load extrinsic metadata"), | side_effect=ValueError("Fake to fail to build or load extrinsic metadata"), | ||||
): | ): | ||||
loader = CRANLoader( | loader = CRANLoader( | ||||
origin_url, artifacts=[{"url": artifact_url, "version": version}] | swh_storage, | ||||
origin_url, | |||||
artifacts=[{"url": artifact_url, "version": version}], | |||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "failed", | "status": "failed", | ||||
"snapshot_id": SNAPSHOT.id.hex(), | "snapshot_id": SNAPSHOT.id.hex(), | ||||
} | } | ||||
visit_stats = get_stats(loader.storage) | visit_stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 33, | "content": 33, | ||||
"directory": 7, | "directory": 7, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == visit_stats | } == visit_stats | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, origin_url, status="partial", type="cran" | swh_storage, origin_url, status="partial", type="cran" | ||||
) | ) |