Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
# flake8: noqa | # flake8: noqa | ||||
# because of long lines | # because of long lines | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import json | import json | ||||
from unittest.mock import call, Mock | |||||
import attr | |||||
from swh.model.identifiers import parse_swhid | from swh.model.identifiers import parse_swhid | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
Origin, | Origin, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
) | ) | ||||
from swh.storage import get_storage | |||||
from swh.storage.interface import PagedResult | |||||
from swh.storage.migrate_extrinsic_metadata import ( | from swh.storage.migrate_extrinsic_metadata import ( | ||||
handle_row, | handle_row, | ||||
pypi_project_from_filename, | pypi_project_from_filename, | ||||
) | ) | ||||
FETCHER = MetadataFetcher( | FETCHER = MetadataFetcher( | ||||
name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | ||||
) | ) | ||||
PYPI_AUTHORITY = MetadataAuthority( | PYPI_AUTHORITY = MetadataAuthority( | ||||
type=MetadataAuthorityType.FORGE, url="https://pypi.org/", metadata={}, | type=MetadataAuthorityType.FORGE, url="https://pypi.org/", | ||||
) | ) | ||||
SWH_AUTHORITY = MetadataAuthority( | SWH_AUTHORITY = MetadataAuthority( | ||||
type=MetadataAuthorityType.REGISTRY, | type=MetadataAuthorityType.REGISTRY, url="https://softwareheritage.org/", | ||||
url="https://softwareheritage.org/", | |||||
metadata={}, | |||||
) | ) | ||||
def test_pypi_project_from_filename(): | def test_pypi_project_from_filename(): | ||||
files = [ | files = [ | ||||
("django-agent-trust-0.1.8.tar.gz", "django-agent-trust"), | ("django-agent-trust-0.1.8.tar.gz", "django-agent-trust"), | ||||
("python_test-1.0.1.zip", "python_test"), | ("python_test-1.0.1.zip", "python_test"), | ||||
("py-evm-0.2.0a9.tar.gz", "py-evm"), | ("py-evm-0.2.0a9.tar.gz", "py-evm"), | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | row = { | ||||
"tool": "PKG-INFO", | "tool": "PKG-INFO", | ||||
}, | }, | ||||
"original_artifact": original_artifacts, | "original_artifact": original_artifacts, | ||||
}, | }, | ||||
} | } | ||||
origin_url = "https://pypi.org/project/m3-ui/" | origin_url = "https://pypi.org/project/m3-ui/" | ||||
storage = Mock() | storage = get_storage("memory") | ||||
storage.origin_add([Origin(url=origin_url)]) | |||||
def origin_get(urls): | storage.metadata_authority_add( | ||||
assert urls == [origin_url] | [ | ||||
return [Origin(url=origin_url)] | attr.evolve(PYPI_AUTHORITY, metadata={}), | ||||
attr.evolve(SWH_AUTHORITY, metadata={}), | |||||
] | |||||
) | |||||
storage.metadata_fetcher_add([FETCHER]) | |||||
storage.origin_get.side_effect = origin_get | |||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
assert storage.method_calls == [ | revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") | ||||
call.origin_get([origin_url]), | assert storage.raw_extrinsic_metadata_get( | ||||
call.raw_extrinsic_metadata_add( | MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, | ||||
[ | ) == PagedResult( | ||||
results=[ | |||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=revision_swhid, | ||||
"swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517" | |||||
), | |||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=PYPI_AUTHORITY, | authority=PYPI_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="pypi-project-json", | format="pypi-project-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_url, | ||||
), | ), | ||||
] | ], | ||||
), | next_page_token=None, | ||||
call.raw_extrinsic_metadata_add( | ) | ||||
[ | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, | |||||
) == PagedResult( | |||||
results=[ | |||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=revision_swhid, | ||||
"swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517" | |||||
), | |||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(original_artifacts).encode(), | metadata=json.dumps(original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_url, | ||||
), | ), | ||||
] | ], | ||||
), | next_page_token=None, | ||||
] | ) | ||||
def test_pypi_2(): | def test_pypi_2(): | ||||
"""Tests loading a revision generated by an old PyPI loader that | """Tests loading a revision generated by an old PyPI loader that | ||||
does not have a provider, but has 'project' metadata.""" | does not have a provider, but has 'project' metadata.""" | ||||
extrinsic_metadata = { | extrinsic_metadata = { | ||||
"name": "jupyterhub-simx", | "name": "jupyterhub-simx", | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | row = { | ||||
"metadata": { | "metadata": { | ||||
"project": extrinsic_metadata, | "project": extrinsic_metadata, | ||||
"original_artifact": source_original_artifacts, | "original_artifact": source_original_artifacts, | ||||
}, | }, | ||||
} | } | ||||
origin_url = "https://pypi.org/project/jupyterhub-simx/" | origin_url = "https://pypi.org/project/jupyterhub-simx/" | ||||
storage = Mock() | storage = get_storage("memory") | ||||
def origin_get(urls): | |||||
assert urls == [origin_url] | |||||
return [Origin(url=origin_url)] | |||||
storage.origin_get.side_effect = origin_get | storage.origin_add([Origin(url=origin_url)]) | ||||
storage.metadata_authority_add( | |||||
[ | |||||
attr.evolve(PYPI_AUTHORITY, metadata={}), | |||||
attr.evolve(SWH_AUTHORITY, metadata={}), | |||||
] | |||||
) | |||||
storage.metadata_fetcher_add([FETCHER]) | |||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
assert storage.method_calls == [ | revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") | ||||
call.raw_extrinsic_metadata_add( | assert storage.raw_extrinsic_metadata_get( | ||||
[ | MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, | ||||
) == PagedResult( | |||||
results=[ | |||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=revision_swhid, | ||||
"swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca" | |||||
), | |||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=PYPI_AUTHORITY, | authority=PYPI_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="pypi-project-json", | format="pypi-project-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=None, | origin=None, | ||||
), | ), | ||||
] | ], | ||||
), | next_page_token=None, | ||||
call.raw_extrinsic_metadata_add( | ) | ||||
[ | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, | |||||
) == PagedResult( | |||||
results=[ | |||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=revision_swhid, | ||||
"swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca" | |||||
), | |||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=None, | origin=None, | ||||
), | ), | ||||
] | ], | ||||
), | next_page_token=None, | ||||
] | ) | ||||
def test_pypi_3(): | def test_pypi_3(): | ||||
"""Tests loading a revision generated by a vert old PyPI loader that | """Tests loading a revision generated by a vert old PyPI loader that | ||||
does not have a provider orhas 'project' metadata.""" | does not have a provider orhas 'project' metadata.""" | ||||
source_original_artifact = { | source_original_artifact = { | ||||
"url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", | "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", | ||||
Show All 30 Lines | row = { | ||||
), | ), | ||||
"type": "tar", | "type": "tar", | ||||
"message": b"0.1.32", | "message": b"0.1.32", | ||||
"metadata": {"original_artifact": source_original_artifact}, | "metadata": {"original_artifact": source_original_artifact}, | ||||
} | } | ||||
origin_url = "https://pypi.org/project/PyPDFLite/" | origin_url = "https://pypi.org/project/PyPDFLite/" | ||||
storage = Mock() | storage = get_storage("memory") | ||||
def origin_get(urls): | |||||
assert urls == [origin_url] | |||||
return [Origin(url=origin_url)] | |||||
storage.origin_get.side_effect = origin_get | storage.origin_add([Origin(url=origin_url)]) | ||||
storage.metadata_authority_add( | |||||
[ | |||||
attr.evolve(PYPI_AUTHORITY, metadata={}), | |||||
attr.evolve(SWH_AUTHORITY, metadata={}), | |||||
] | |||||
) | |||||
storage.metadata_fetcher_add([FETCHER]) | |||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
assert storage.method_calls == [ | revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") | ||||
call.raw_extrinsic_metadata_add( | |||||
[ | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, | |||||
) == PagedResult(results=[], next_page_token=None,) | |||||
assert storage.raw_extrinsic_metadata_get( | |||||
MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, | |||||
) == PagedResult( | |||||
results=[ | |||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=revision_swhid, | ||||
"swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2" | |||||
), | |||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, | 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=None, | origin=None, | ||||
), | ), | ||||
] | ], | ||||
), | next_page_token=None, | ||||
] | ) |