diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py index a84b042d..cd74e22b 100644 --- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py @@ -1,356 +1,372 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # flake8: noqa # because of long lines import copy import datetime import json -from unittest.mock import call, Mock + +import attr from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, MetadataTargetType, Origin, RawExtrinsicMetadata, ) +from swh.storage import get_storage +from swh.storage.interface import PagedResult from swh.storage.migrate_extrinsic_metadata import ( handle_row, pypi_project_from_filename, ) FETCHER = MetadataFetcher( name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", ) PYPI_AUTHORITY = MetadataAuthority( - type=MetadataAuthorityType.FORGE, url="https://pypi.org/", metadata={}, + type=MetadataAuthorityType.FORGE, url="https://pypi.org/", ) SWH_AUTHORITY = MetadataAuthority( - type=MetadataAuthorityType.REGISTRY, - url="https://softwareheritage.org/", - metadata={}, + type=MetadataAuthorityType.REGISTRY, url="https://softwareheritage.org/", ) def test_pypi_project_from_filename(): files = [ ("django-agent-trust-0.1.8.tar.gz", "django-agent-trust"), ("python_test-1.0.1.zip", "python_test"), ("py-evm-0.2.0a9.tar.gz", "py-evm"), ("collective.texttospeech-1.0rc1.tar.gz", "collective.texttospeech"), ("flatland-fork-0.4.post1.dev40550160.zip", "flatland-fork"), ] for (filename, project) in files: assert pypi_project_from_filename(filename) == project def test_pypi_1(): """Tests loading a revision generated by a new PyPI loader that has a provider.""" extrinsic_metadata = { "url": "https://files.pythonhosted.org/packages/70/89/a498245baf1bf3dde73d3da00b4b067a8aa7c7378ad83472078803ea3e43/m3-ui-2.2.73.tar.gz", "size": 3933168, "digests": { "md5": "a374ac3f655e97df5db5335e2142d344", "sha256": "1bc2756f7d0d2e15cf5880ca697682ff35e8b58116bf73eb9c78b3db358c5b7d", }, "has_sig": False, "filename": "m3-ui-2.2.73.tar.gz", "downloads": -1, "md5_digest": "a374ac3f655e97df5db5335e2142d344", "packagetype": "sdist", "upload_time": "2019-11-11T06:21:20", "comment_text": "", "python_version": "source", "requires_python": None, "upload_time_iso_8601": "2019-11-11T06:21:20.073082Z", } original_artifacts = [ { "length": 3933168, "filename": "m3-ui-2.2.73.tar.gz", "checksums": { "sha1": "9f4ec7ce64b7fea4b122e85d47ea31146c367b03", "sha256": "1bc2756f7d0d2e15cf5880ca697682ff35e8b58116bf73eb9c78b3db358c5b7d", }, } ] row = { "id": b"\x00\x00\x07a{S\xe7\xb1E\x8fi]\xd0}\xe4\xceU\xaf\x15\x17", "date": datetime.datetime( 2019, 11, 11, 6, 21, 20, tzinfo=datetime.timezone.utc, ), "committer_date": datetime.datetime( 2019, 11, 11, 6, 21, 20, tzinfo=datetime.timezone.utc, ), "type": "tar", "message": b"2.2.73", "metadata": { "extrinsic": { "raw": extrinsic_metadata, "when": "2020-01-23T18:43:09.109407+00:00", "provider": "https://pypi.org/pypi/m3-ui/json", }, "intrinsic": { "raw": { "name": "m3-ui", "summary": "======", "version": "2.2.73", # ... "metadata_version": "1.1", }, "tool": "PKG-INFO", }, "original_artifact": original_artifacts, }, } origin_url = "https://pypi.org/project/m3-ui/" - storage = Mock() - - def origin_get(urls): - assert urls == [origin_url] - return [Origin(url=origin_url)] + storage = get_storage("memory") + storage.origin_add([Origin(url=origin_url)]) + storage.metadata_authority_add( + [ + attr.evolve(PYPI_AUTHORITY, metadata={}), + attr.evolve(SWH_AUTHORITY, metadata={}), + ] + ) + storage.metadata_fetcher_add([FETCHER]) - storage.origin_get.side_effect = origin_get deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - assert storage.method_calls == [ - call.origin_get([origin_url]), - call.raw_extrinsic_metadata_add( - [ - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517" - ), - discovery_date=datetime.datetime( - 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, - ), - authority=PYPI_AUTHORITY, - fetcher=FETCHER, - format="pypi-project-json", - metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") + assert storage.raw_extrinsic_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + ) == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.REVISION, + id=revision_swhid, + discovery_date=datetime.datetime( + 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, ), - ] - ), - call.raw_extrinsic_metadata_add( - [ - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517" - ), - discovery_date=datetime.datetime( - 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, - ), - authority=SWH_AUTHORITY, - fetcher=FETCHER, - format="original-artifacts-json", - metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + authority=PYPI_AUTHORITY, + fetcher=FETCHER, + format="pypi-project-json", + metadata=json.dumps(extrinsic_metadata).encode(), + origin=origin_url, + ), + ], + next_page_token=None, + ) + assert storage.raw_extrinsic_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + ) == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.REVISION, + id=revision_swhid, + discovery_date=datetime.datetime( + 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, ), - ] - ), - ] + authority=SWH_AUTHORITY, + fetcher=FETCHER, + format="original-artifacts-json", + metadata=json.dumps(original_artifacts).encode(), + origin=origin_url, + ), + ], + next_page_token=None, + ) def test_pypi_2(): """Tests loading a revision generated by an old PyPI loader that does not have a provider, but has 'project' metadata.""" extrinsic_metadata = { "name": "jupyterhub-simx", "author": "Jupyter Development Team", "license": "BSD", "summary": "JupyterHub: A multi-user server for Jupyter notebooks", "version": "1.0.5", # ... } source_original_artifacts = [ { "url": "https://files.pythonhosted.org/packages/72/28/a8098763d78e2c4607cb67602c0d726a97ac38d4c1f531aac28f49de2e1a/jupyterhub-simx-1.0.5.tar.gz", "date": "2019-01-23T22:10:55", "sha1": "ede3eadd5a06e70912e3ba7cfccef789c4ad3168", "size": 2346538, "sha256": "0399d7f5f0d90c525d369f0507ad0e8ef8729c1c7fa63aadfc46a27514d14a46", "filename": "jupyterhub-simx-1.0.5.tar.gz", "sha1_git": "734301124712182eb30fc90e97cc18cef5432f02", "blake2s256": "bb4aa82ffb5891a05dcf6d4dce3ad56fd2c18e9abdba9d20972910649d869322", "archive_type": "tar", } ] dest_original_artifacts = [ { "url": "https://files.pythonhosted.org/packages/72/28/a8098763d78e2c4607cb67602c0d726a97ac38d4c1f531aac28f49de2e1a/jupyterhub-simx-1.0.5.tar.gz", "filename": "jupyterhub-simx-1.0.5.tar.gz", "archive_type": "tar", "length": 2346538, "checksums": { "sha1": "ede3eadd5a06e70912e3ba7cfccef789c4ad3168", "sha256": "0399d7f5f0d90c525d369f0507ad0e8ef8729c1c7fa63aadfc46a27514d14a46", "sha1_git": "734301124712182eb30fc90e97cc18cef5432f02", "blake2s256": "bb4aa82ffb5891a05dcf6d4dce3ad56fd2c18e9abdba9d20972910649d869322", }, } ] row = { "id": b"\x00\x00\x04\xd68,J\xd4\xc0Q\x92fbl6U\x1f\x0eQ\xca", "date": datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc ), "committer_date": datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc ), "type": "tar", "message": b"1.0.5", "metadata": { "project": extrinsic_metadata, "original_artifact": source_original_artifacts, }, } origin_url = "https://pypi.org/project/jupyterhub-simx/" - storage = Mock() - - def origin_get(urls): - assert urls == [origin_url] - return [Origin(url=origin_url)] + storage = get_storage("memory") - storage.origin_get.side_effect = origin_get + storage.origin_add([Origin(url=origin_url)]) + storage.metadata_authority_add( + [ + attr.evolve(PYPI_AUTHORITY, metadata={}), + attr.evolve(SWH_AUTHORITY, metadata={}), + ] + ) + storage.metadata_fetcher_add([FETCHER]) deposit_cur = None + handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - assert storage.method_calls == [ - call.raw_extrinsic_metadata_add( - [ - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca" - ), - discovery_date=datetime.datetime( - 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, - ), - authority=PYPI_AUTHORITY, - fetcher=FETCHER, - format="pypi-project-json", - metadata=json.dumps(extrinsic_metadata).encode(), - origin=None, + revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") + assert storage.raw_extrinsic_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + ) == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.REVISION, + id=revision_swhid, + discovery_date=datetime.datetime( + 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, ), - ] - ), - call.raw_extrinsic_metadata_add( - [ - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca" - ), - discovery_date=datetime.datetime( - 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, - ), - authority=SWH_AUTHORITY, - fetcher=FETCHER, - format="original-artifacts-json", - metadata=json.dumps(dest_original_artifacts).encode(), - origin=None, + authority=PYPI_AUTHORITY, + fetcher=FETCHER, + format="pypi-project-json", + metadata=json.dumps(extrinsic_metadata).encode(), + origin=None, + ), + ], + next_page_token=None, + ) + assert storage.raw_extrinsic_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + ) == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.REVISION, + id=revision_swhid, + discovery_date=datetime.datetime( + 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, ), - ] - ), - ] + authority=SWH_AUTHORITY, + fetcher=FETCHER, + format="original-artifacts-json", + metadata=json.dumps(dest_original_artifacts).encode(), + origin=None, + ), + ], + next_page_token=None, + ) def test_pypi_3(): """Tests loading a revision generated by a vert old PyPI loader that does not have a provider orhas 'project' metadata.""" source_original_artifact = { "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", "date": "2014-05-07T22:03:00", "sha1": "3289269f75b4111dd00eaea53e00330db9a1db12", "size": 46644, "sha256": "911497d655cf7ef6530c5b57773dad7da97e21cf4d608ad9ad1e38bd7bec7824", "filename": "PyPDFLite-0.1.32.tar.gz", "sha1_git": "1e5c38014731242cfa8594839bcba8a0c4e158c5", "blake2s256": "45792e57873f56d385c694e36c98a580cbba60d5ea91eb6fd0a2d1c71c1fb385", "archive_type": "tar", } dest_original_artifacts = [ { "url": "https://files.pythonhosted.org/packages/34/4f/30087f22eaae8ad7077a28ce157342745a2977e264b8a8e4e7f804a8aa5e/PyPDFLite-0.1.32.tar.gz", "filename": "PyPDFLite-0.1.32.tar.gz", "archive_type": "tar", "length": 46644, "checksums": { "sha1": "3289269f75b4111dd00eaea53e00330db9a1db12", "sha256": "911497d655cf7ef6530c5b57773dad7da97e21cf4d608ad9ad1e38bd7bec7824", "sha1_git": "1e5c38014731242cfa8594839bcba8a0c4e158c5", "blake2s256": "45792e57873f56d385c694e36c98a580cbba60d5ea91eb6fd0a2d1c71c1fb385", }, } ] row = { "id": b"N\xa9\x91|\xdfS\xcd\x13SJ\x04.N\xb3x{\x86\xc84\xd2", "date": datetime.datetime(2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc), "committer_date": datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc ), "type": "tar", "message": b"0.1.32", "metadata": {"original_artifact": source_original_artifact}, } origin_url = "https://pypi.org/project/PyPDFLite/" - storage = Mock() + storage = get_storage("memory") - def origin_get(urls): - assert urls == [origin_url] - return [Origin(url=origin_url)] - - storage.origin_get.side_effect = origin_get + storage.origin_add([Origin(url=origin_url)]) + storage.metadata_authority_add( + [ + attr.evolve(PYPI_AUTHORITY, metadata={}), + attr.evolve(SWH_AUTHORITY, metadata={}), + ] + ) + storage.metadata_fetcher_add([FETCHER]) deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - assert storage.method_calls == [ - call.raw_extrinsic_metadata_add( - [ - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - id=parse_swhid( - "swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2" - ), - discovery_date=datetime.datetime( - 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, - ), - authority=SWH_AUTHORITY, - fetcher=FETCHER, - format="original-artifacts-json", - metadata=json.dumps(dest_original_artifacts).encode(), - origin=None, + revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") + + assert storage.raw_extrinsic_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority=PYPI_AUTHORITY, + ) == PagedResult(results=[], next_page_token=None,) + assert storage.raw_extrinsic_metadata_get( + MetadataTargetType.REVISION, revision_swhid, authority=SWH_AUTHORITY, + ) == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.REVISION, + id=revision_swhid, + discovery_date=datetime.datetime( + 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, ), - ] - ), - ] + authority=SWH_AUTHORITY, + fetcher=FETCHER, + format="original-artifacts-json", + metadata=json.dumps(dest_original_artifacts).encode(), + origin=None, + ), + ], + next_page_token=None, + )