Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
# flake8: noqa | # flake8: noqa | ||||
# because of long lines | # because of long lines | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import json | import json | ||||
import urllib.error | import urllib.error | ||||
import attr | import attr | ||||
from swh.model.identifiers import parse_swhid | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | |||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid | |||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.interface import PagedResult | from swh.storage.interface import PagedResult | ||||
from swh.storage.migrate_extrinsic_metadata import ( | from swh.storage.migrate_extrinsic_metadata import ( | ||||
handle_row, | handle_row, | ||||
pypi_origin_from_filename, | pypi_origin_from_filename, | ||||
pypi_project_from_filename, | pypi_project_from_filename, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 223 Lines • ▼ Show 20 Lines | row = { | ||||
}, | }, | ||||
"tool": "PKG-INFO", | "tool": "PKG-INFO", | ||||
}, | }, | ||||
"original_artifact": original_artifacts, | "original_artifact": original_artifacts, | ||||
}, | }, | ||||
} | } | ||||
origin_url = "https://pypi.org/project/m3-ui/" | origin_url = "https://pypi.org/project/m3-ui/" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = get_storage("memory") | storage = get_storage("memory") | ||||
storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
storage.metadata_authority_add( | storage.metadata_authority_add( | ||||
[ | [ | ||||
attr.evolve(PYPI_AUTHORITY, metadata={}), | attr.evolve(PYPI_AUTHORITY, metadata={}), | ||||
attr.evolve(SWH_AUTHORITY, metadata={}), | attr.evolve(SWH_AUTHORITY, metadata={}), | ||||
] | ] | ||||
) | ) | ||||
storage.metadata_fetcher_add([FETCHER]) | storage.metadata_fetcher_add([FETCHER]) | ||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") | revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | ||||
) == PagedResult( | ) == PagedResult( | ||||
results=[ | results=[ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=PYPI_AUTHORITY, | authority=PYPI_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="pypi-project-json", | format="pypi-project-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=revision_swhid, | revision=revision_swhid, | ||||
), | ), | ||||
], | ], | ||||
next_page_token=None, | next_page_token=None, | ||||
) | ) | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, | DIRECTORY_SWHID, authority=SWH_AUTHORITY, | ||||
) == PagedResult( | ) == PagedResult( | ||||
results=[ | results=[ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(original_artifacts).encode(), | metadata=json.dumps(original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=revision_swhid, | revision=revision_swhid, | ||||
), | ), | ||||
], | ], | ||||
next_page_token=None, | next_page_token=None, | ||||
) | ) | ||||
def test_pypi_2(mocker): | def test_pypi_2(mocker): | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | def test_pypi_2(mocker): | ||||
) | ) | ||||
storage.metadata_fetcher_add([FETCHER]) | storage.metadata_fetcher_add([FETCHER]) | ||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") | revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | ||||
) == PagedResult( | ) == PagedResult( | ||||
results=[ | results=[ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=PYPI_AUTHORITY, | authority=PYPI_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="pypi-project-json", | format="pypi-project-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=None, | origin=None, | ||||
revision=revision_swhid, | revision=revision_swhid, | ||||
), | ), | ||||
], | ], | ||||
next_page_token=None, | next_page_token=None, | ||||
) | ) | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, | DIRECTORY_SWHID, authority=SWH_AUTHORITY, | ||||
) == PagedResult( | ) == PagedResult( | ||||
results=[ | results=[ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | def test_pypi_3(mocker): | ||||
) | ) | ||||
storage.metadata_fetcher_add([FETCHER]) | storage.metadata_fetcher_add([FETCHER]) | ||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") | revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | ||||
) == PagedResult(results=[], next_page_token=None,) | ) == PagedResult(results=[], next_page_token=None,) | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, | DIRECTORY_SWHID, authority=SWH_AUTHORITY, | ||||
) == PagedResult( | ) == PagedResult( | ||||
results=[ | results=[ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, | 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | row = { | ||||
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc | 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
"type": "tar", | "type": "tar", | ||||
"message": b"0.1.32", | "message": b"0.1.32", | ||||
"metadata": {"original_artifact": source_original_artifact}, | "metadata": {"original_artifact": source_original_artifact}, | ||||
} | } | ||||
origin_url = "https://pypi.org/project/PyPDFLite/" | origin_url = "https://pypi.org/project/PyPDFLite/" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = get_storage("memory") | storage = get_storage("memory") | ||||
snapshot_id = b"42" * 10 | snapshot_id = b"42" * 10 | ||||
storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
storage.origin_visit_add( | storage.origin_visit_add( | ||||
[OriginVisit(origin=origin_url, visit=1, date=now(), type="pypi")] | [OriginVisit(origin=origin_url, visit=1, date=now(), type="pypi")] | ||||
) | ) | ||||
Show All 28 Lines | def test_pypi_good_origin(): | ||||
) | ) | ||||
storage.metadata_fetcher_add([FETCHER]) | storage.metadata_fetcher_add([FETCHER]) | ||||
deposit_cur = None | deposit_cur = None | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") | revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | DIRECTORY_SWHID, authority=PYPI_AUTHORITY, | ||||
) == PagedResult(results=[], next_page_token=None,) | ) == PagedResult(results=[], next_page_token=None,) | ||||
assert storage.raw_extrinsic_metadata_get( | assert storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, | DIRECTORY_SWHID, authority=SWH_AUTHORITY, | ||||
) == PagedResult( | ) == PagedResult( | ||||
results=[ | results=[ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, | 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=revision_swhid, | revision=revision_swhid, | ||||
), | ), | ||||
], | ], | ||||
next_page_token=None, | next_page_token=None, | ||||
) | ) |