Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/npm/tests/test_npm.py
Show All 11 Lines | |||||
from swh.loader.package.npm.loader import ( | from swh.loader.package.npm.loader import ( | ||||
NpmLoader, | NpmLoader, | ||||
_author_str, | _author_str, | ||||
extract_npm_package_author, | extract_npm_package_author, | ||||
) | ) | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | |||||
MetadataAuthorityType, | |||||
MetadataFetcher, | |||||
Person, | Person, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher | |||||
from swh.model.model import ObjectType as ModelObjectType | |||||
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | ||||
from swh.storage.interface import PagedResult | from swh.storage.interface import PagedResult | ||||
@pytest.fixture | @pytest.fixture | ||||
def org_api_info(datadir) -> bytes: | def org_api_info(datadir) -> bytes: | ||||
with open(os.path.join(datadir, "https_replicate.npmjs.com", "org"), "rb",) as f: | with open(os.path.join(datadir, "https_replicate.npmjs.com", "org"), "rb",) as f: | ||||
return f.read() | return f.read() | ||||
▲ Show 20 Lines • Show All 237 Lines • ▼ Show 20 Lines | [ | ||||
"5ce6c1cd5cda2d546db513aaad8c72a44c7771e2", | "5ce6c1cd5cda2d546db513aaad8c72a44c7771e2", | ||||
"c337091e349b6ac10d38a49cdf8c2401ef9bb0f2", | "c337091e349b6ac10d38a49cdf8c2401ef9bb0f2", | ||||
"202fafcd7c0f8230e89d5496ad7f44ab12b807bf", | "202fafcd7c0f8230e89d5496ad7f44ab12b807bf", | ||||
"775cc516543be86c15c1dc172f49c0d4e6e78235", | "775cc516543be86c15c1dc172f49c0d4e6e78235", | ||||
"ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e", | "ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e", | ||||
] | ] | ||||
) | ) | ||||
_expected_new_revisions_first_visit = normalize_hashes( | _expected_new_releases_first_visit = normalize_hashes( | ||||
{ | { | ||||
"d8a1c7474d2956ac598a19f0f27d52f7015f117e": ( | "d25e722a32c145b3eb88b416049dd35d27759a87": ( | ||||
"42753c0c2ab00c4501b552ac4671c68f3cf5aece" | "42753c0c2ab00c4501b552ac4671c68f3cf5aece" | ||||
), | ), | ||||
"5f9eb78af37ffd12949f235e86fac04898f9f72a": ( | "3522e846b97c0b8434c565fe891c0f082a357e5d": ( | ||||
"3370d20d6f96dc1c9e50f083e2134881db110f4f" | "3370d20d6f96dc1c9e50f083e2134881db110f4f" | ||||
), | ), | ||||
"ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a": ( | "54f6c1711c6aedb6de3cf2d6347b9f772e343784": ( | ||||
"d7895533ef5edbcffdea3f057d9fef3a1ef845ce" | "d7895533ef5edbcffdea3f057d9fef3a1ef845ce" | ||||
), | ), | ||||
} | } | ||||
) | ) | ||||
def package_url(package): | def package_url(package): | ||||
return "https://www.npmjs.com/package/%s" % package | return "https://www.npmjs.com/package/%s" % package | ||||
def package_metadata_url(package): | def package_metadata_url(package): | ||||
return "https://replicate.npmjs.com/%s/" % package | return "https://replicate.npmjs.com/%s/" % package | ||||
def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info): | def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info): | ||||
package = "org" | package = "org" | ||||
url = package_url(package) | url = package_url(package) | ||||
loader = NpmLoader(swh_storage, url) | loader = NpmLoader(swh_storage, url) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = hash_to_bytes("d0587e1195aed5a8800411a008f2f2d627f18e2d") | expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id.hex(), | "snapshot_id": expected_snapshot_id.hex(), | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id | swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id | ||||
) | ) | ||||
versions = [ | versions = [ | ||||
("0.0.2", "d8a1c7474d2956ac598a19f0f27d52f7015f117e"), | ("0.0.2", "d25e722a32c145b3eb88b416049dd35d27759a87"), | ||||
("0.0.3", "5f9eb78af37ffd12949f235e86fac04898f9f72a"), | ("0.0.3", "3522e846b97c0b8434c565fe891c0f082a357e5d"), | ||||
("0.0.4", "ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a"), | ("0.0.4", "54f6c1711c6aedb6de3cf2d6347b9f772e343784"), | ||||
] | ] | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=expected_snapshot_id, | id=expected_snapshot_id, | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=b"releases/0.0.4", target_type=TargetType.ALIAS | target=b"releases/0.0.4", target_type=TargetType.ALIAS | ||||
), | ), | ||||
**{ | **{ | ||||
b"releases/" | b"releases/" | ||||
+ version_name.encode(): SnapshotBranch( | + version_name.encode(): SnapshotBranch( | ||||
target=hash_to_bytes(version_id), target_type=TargetType.REVISION, | target=hash_to_bytes(version_id), target_type=TargetType.RELEASE, | ||||
) | ) | ||||
for (version_name, version_id) in versions | for (version_name, version_id) in versions | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, swh_storage) | check_snapshot(expected_snapshot, swh_storage) | ||||
contents = swh_storage.content_get(_expected_new_contents_first_visit) | contents = swh_storage.content_get(_expected_new_contents_first_visit) | ||||
count = sum(0 if content is None else 1 for content in contents) | count = sum(0 if content is None else 1 for content in contents) | ||||
assert count == len(_expected_new_contents_first_visit) | assert count == len(_expected_new_contents_first_visit) | ||||
assert ( | assert ( | ||||
list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] | list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == [] | ||||
) | ) | ||||
assert list(swh_storage.revision_missing(_expected_new_revisions_first_visit)) == [] | assert list(swh_storage.release_missing(_expected_new_releases_first_visit)) == [] | ||||
metadata_authority = MetadataAuthority( | metadata_authority = MetadataAuthority( | ||||
type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", | type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", | ||||
) | ) | ||||
for (version_name, revision_id) in versions: | for (version_name, release_id) in versions: | ||||
revision = swh_storage.revision_get([hash_to_bytes(revision_id)])[0] | release = swh_storage.release_get([hash_to_bytes(release_id)])[0] | ||||
directory_id = revision.directory | assert release.target_type == ModelObjectType.DIRECTORY | ||||
directory_id = release.target | |||||
directory_swhid = ExtendedSWHID( | directory_swhid = ExtendedSWHID( | ||||
object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, | object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id, | ||||
) | ) | ||||
revision_swhid = CoreSWHID( | release_swhid = CoreSWHID( | ||||
object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id), | object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id), | ||||
) | ) | ||||
expected_metadata = [ | expected_metadata = [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
target=directory_swhid, | target=directory_swhid, | ||||
authority=metadata_authority, | authority=metadata_authority, | ||||
fetcher=MetadataFetcher( | fetcher=MetadataFetcher( | ||||
name="swh.loader.package.npm.loader.NpmLoader", version=__version__, | name="swh.loader.package.npm.loader.NpmLoader", version=__version__, | ||||
), | ), | ||||
discovery_date=loader.visit_date, | discovery_date=loader.visit_date, | ||||
format="replicate-npm-package-json", | format="replicate-npm-package-json", | ||||
metadata=json.dumps( | metadata=json.dumps( | ||||
json.loads(org_api_info)["versions"][version_name] | json.loads(org_api_info)["versions"][version_name] | ||||
).encode(), | ).encode(), | ||||
origin="https://www.npmjs.com/package/org", | origin="https://www.npmjs.com/package/org", | ||||
revision=revision_swhid, | release=release_swhid, | ||||
) | ) | ||||
] | ] | ||||
assert swh_storage.raw_extrinsic_metadata_get( | assert swh_storage.raw_extrinsic_metadata_get( | ||||
directory_swhid, metadata_authority, | directory_swhid, metadata_authority, | ||||
) == PagedResult(next_page_token=None, results=expected_metadata,) | ) == PagedResult(next_page_token=None, results=expected_metadata,) | ||||
stats = get_stats(swh_storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": len(_expected_new_contents_first_visit), | "content": len(_expected_new_contents_first_visit), | ||||
"directory": len(_expected_new_directories_first_visit), | "directory": len(_expected_new_directories_first_visit), | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": len(_expected_new_releases_first_visit), | ||||
"revision": len(_expected_new_revisions_first_visit), | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
def test_npm_loader_incremental_visit(swh_storage, requests_mock_datadir_visits): | def test_npm_loader_incremental_visit(swh_storage, requests_mock_datadir_visits): | ||||
package = "org" | package = "org" | ||||
url = package_url(package) | url = package_url(package) | ||||
loader = NpmLoader(swh_storage, url) | loader = NpmLoader(swh_storage, url) | ||||
expected_snapshot_id = hash_to_bytes("d0587e1195aed5a8800411a008f2f2d627f18e2d") | expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id.hex(), | "snapshot_id": expected_snapshot_id.hex(), | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id | swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id | ||||
) | ) | ||||
stats = get_stats(swh_storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": len(_expected_new_contents_first_visit), | "content": len(_expected_new_contents_first_visit), | ||||
"directory": len(_expected_new_directories_first_visit), | "directory": len(_expected_new_directories_first_visit), | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": len(_expected_new_releases_first_visit), | ||||
"revision": len(_expected_new_revisions_first_visit), | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
# reset loader internal state | # reset loader internal state | ||||
del loader._cached_info | del loader._cached_info | ||||
del loader._cached__raw_info | del loader._cached__raw_info | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "eventful" | assert actual_load_status2["status"] == "eventful" | ||||
snap_id2 = actual_load_status2["snapshot_id"] | snap_id2 = actual_load_status2["snapshot_id"] | ||||
assert snap_id2 is not None | assert snap_id2 is not None | ||||
assert snap_id2 != actual_load_status["snapshot_id"] | assert snap_id2 != actual_load_status["snapshot_id"] | ||||
assert_last_visit_matches(swh_storage, url, status="full", type="npm") | assert_last_visit_matches(swh_storage, url, status="full", type="npm") | ||||
stats = get_stats(swh_storage) | stats = get_stats(swh_storage) | ||||
assert { # 3 new releases artifacts | assert { # 3 new releases artifacts | ||||
"content": len(_expected_new_contents_first_visit) + 14, | "content": len(_expected_new_contents_first_visit) + 14, | ||||
"directory": len(_expected_new_directories_first_visit) + 15, | "directory": len(_expected_new_directories_first_visit) + 15, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 2, | "origin_visit": 2, | ||||
"release": 0, | "release": len(_expected_new_releases_first_visit) + 3, | ||||
"revision": len(_expected_new_revisions_first_visit) + 3, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} == stats | } == stats | ||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir_visits.request_history | for m in requests_mock_datadir_visits.request_history | ||||
if m.url.startswith("https://registry.npmjs.org") | if m.url.startswith("https://registry.npmjs.org") | ||||
] | ] | ||||
assert len(urls) == len(set(urls)) # we visited each artifact once across | assert len(urls) == len(set(urls)) # we visited each artifact once across | ||||
@pytest.mark.usefixtures("requests_mock_datadir") | @pytest.mark.usefixtures("requests_mock_datadir") | ||||
def test_npm_loader_version_divergence(swh_storage): | def test_npm_loader_version_divergence(swh_storage): | ||||
package = "@aller_shared" | package = "@aller_shared" | ||||
url = package_url(package) | url = package_url(package) | ||||
loader = NpmLoader(swh_storage, url) | loader = NpmLoader(swh_storage, url) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = hash_to_bytes("b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92") | expected_snapshot_id = hash_to_bytes("7a89bc3cb51ff1d3213b2151c745d82c3b9d69b1") | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id.hex(), | "snapshot_id": expected_snapshot_id.hex(), | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id | swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id | ||||
) | ) | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=expected_snapshot_id, | id=expected_snapshot_id, | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target_type=TargetType.ALIAS, target=b"releases/0.1.0" | target_type=TargetType.ALIAS, target=b"releases/0.1.0" | ||||
), | ), | ||||
b"releases/0.1.0": SnapshotBranch( | b"releases/0.1.0": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.RELEASE, | ||||
target=hash_to_bytes("845673bfe8cbd31b1eaf757745a964137e6f9116"), | target=hash_to_bytes("103fa6d0a1abb405468e3590dcf634bcb77f67be"), | ||||
), | ), | ||||
b"releases/0.1.1-alpha.14": SnapshotBranch( | b"releases/0.1.1-alpha.14": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.RELEASE, | ||||
target=hash_to_bytes("05181c12cd8c22035dd31155656826b85745da37"), | target=hash_to_bytes("c00b54143582a4e963e0b86e8dfa58eedd260020"), | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, swh_storage) | check_snapshot(expected_snapshot, swh_storage) | ||||
stats = get_stats(swh_storage) | stats = get_stats(swh_storage) | ||||
assert { # 1 new releases artifacts | assert { # 1 new releases artifacts | ||||
"content": 534, | "content": 534, | ||||
"directory": 153, | "directory": 153, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 2, | ||||
"revision": 2, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): | def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir): | ||||
"""Skip artifact with no intrinsic metadata during ingestion | """Skip artifact with no intrinsic metadata during ingestion | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir): | ||||
"""With no time upload, artifact is skipped | """With no time upload, artifact is skipped | ||||
""" | """ | ||||
package = "jammit-express" | package = "jammit-express" | ||||
url = package_url(package) | url = package_url(package) | ||||
loader = NpmLoader(swh_storage, url) | loader = NpmLoader(swh_storage, url) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = hash_to_bytes("d6e08e19159f77983242877c373c75222d5ae9dd") | expected_snapshot_id = hash_to_bytes("7f5e591dd3c4754abca4db1cc18355671e2c014c") | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id.hex(), | "snapshot_id": expected_snapshot_id.hex(), | ||||
} | } | ||||
# artifact is used | # artifact is used | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=expected_snapshot_id, | id=expected_snapshot_id, | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target_type=TargetType.ALIAS, target=b"releases/0.0.1" | target_type=TargetType.ALIAS, target=b"releases/0.0.1" | ||||
), | ), | ||||
b"releases/0.0.1": SnapshotBranch( | b"releases/0.0.1": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.RELEASE, | ||||
target=hash_to_bytes("9e4dd2b40d1b46b70917c0949aa2195c823a648e"), | target=hash_to_bytes("199bf0ad020617357d608655e6549e526a65dc36"), | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id | swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id | ||||
) | ) | ||||
Show All 27 Lines |