Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/pypi.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import logging | |||||
from typing import Any, Dict, Generator, Mapping, Optional, Sequence, Tuple | from typing import Any, Dict, Generator, Mapping, Optional, Sequence, Tuple | ||||
from urllib.parse import urlparse | from urllib.parse import urlparse | ||||
from pkginfo import UnpackedSDist | from pkginfo import UnpackedSDist | ||||
import iso8601 | import iso8601 | ||||
from swh.model.identifiers import normalize_timestamp | from swh.model.identifiers import normalize_timestamp | ||||
from swh.loader.package.loader import PackageLoader | from swh.loader.package.loader import PackageLoader | ||||
from swh.loader.package.utils import api_info, release_name | from swh.loader.package.utils import api_info, release_name | ||||
logger = logging.getLogger(__name__) | |||||
class PyPILoader(PackageLoader): | class PyPILoader(PackageLoader): | ||||
"""Load pypi origin's artifact releases into swh archive. | """Load pypi origin's artifact releases into swh archive. | ||||
""" | """ | ||||
visit_type = 'pypi' | visit_type = 'pypi' | ||||
def __init__(self, url): | def __init__(self, url): | ||||
Show All 35 Lines | def get_package_info(self, version: str) -> Generator[ | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
else: | else: | ||||
for version, p_info in res: | for version, p_info in res: | ||||
yield release_name(version, p_info['filename']), p_info | yield release_name(version, p_info['filename']), p_info | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, artifact_metadata: Dict) \ | self, known_artifacts: Dict, artifact_metadata: Dict) \ | ||||
-> Optional[bytes]: | -> Optional[bytes]: | ||||
sha256 = artifact_metadata['digests']['sha256'] | return artifact_to_revision_id(known_artifacts, artifact_metadata) | ||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
for original_artifact in known_artifact['original_artifact']: | |||||
if sha256 == original_artifact['checksums']['sha256']: | |||||
return rev_id | |||||
return None | |||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Dict, uncompressed_path: str) -> Dict: | self, a_metadata: Dict, uncompressed_path: str) -> Dict: | ||||
i_metadata = extract_intrinsic_metadata(uncompressed_path) | i_metadata = extract_intrinsic_metadata(uncompressed_path) | ||||
# from intrinsic metadata | # from intrinsic metadata | ||||
name = i_metadata['version'] | name = i_metadata['version'] | ||||
_author = author(i_metadata) | _author = author(i_metadata) | ||||
Show All 21 Lines | def build_revision( | ||||
'provider': self.provider_url, | 'provider': self.provider_url, | ||||
'when': self.visit_date.isoformat(), | 'when': self.visit_date.isoformat(), | ||||
'raw': a_metadata, | 'raw': a_metadata, | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
def artifact_to_revision_id( | |||||
known_artifacts: Dict, artifact_metadata: Dict) -> Optional[bytes]: | |||||
"""Given metadata artifact, solves the associated revision id. | |||||
The following code allows to deal with 2 metadata formats (column metadata | |||||
in 'revision') | |||||
- old format sample: | |||||
{ | |||||
'original_artifact': { | |||||
'sha256': "6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec", # noqa | |||||
... | |||||
}, | |||||
... | |||||
} | |||||
- new format sample: | |||||
{ | |||||
'original_artifact': [{ | |||||
'checksums': { | |||||
'sha256': "6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec", # noqa | |||||
... | |||||
}, | |||||
}], | |||||
... | |||||
} | |||||
""" | |||||
sha256 = artifact_metadata['digests']['sha256'] | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
original_artifact = known_artifact['original_artifact'] | |||||
if isinstance(original_artifact, dict): | |||||
# previous loader-pypi version stored metadata as dict | |||||
original_sha256 = original_artifact['sha256'] | |||||
if sha256 == original_sha256: | |||||
return rev_id | |||||
continue | |||||
# new pypi loader actually store metadata dict differently... | |||||
assert isinstance(original_artifact, list) | |||||
# current loader-pypi stores metadata as list of dict | |||||
for original_artifact in known_artifact['original_artifact']: | |||||
if sha256 == original_artifact['checksums']['sha256']: | |||||
return rev_id | |||||
return None | |||||
def pypi_api_url(url: str) -> str: | def pypi_api_url(url: str) -> str: | ||||
"""Compute api url from a project url | """Compute api url from a project url | ||||
Args: | Args: | ||||
url (str): PyPI instance's url (e.g: https://pypi.org/project/requests) | url (str): PyPI instance's url (e.g: https://pypi.org/project/requests) | ||||
This deals with correctly transforming the project's api url (e.g | This deals with correctly transforming the project's api url (e.g | ||||
https://pypi.org/pypi/requests/json) | https://pypi.org/pypi/requests/json) | ||||
▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines |