Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cpan/lister.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||||||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||||||||
from collections import defaultdict | from collections import defaultdict | ||||||||||
from datetime import datetime | from datetime import datetime | ||||||||||
import logging | import logging | ||||||||||
from typing import Any, Dict, Iterator, List, Optional, Set | from typing import Any, Dict, Iterator, List, Optional, Set, Union | ||||||||||
import iso8601 | import iso8601 | ||||||||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||||||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||||||||
from ..pattern import CredentialsType, StatelessLister | from ..pattern import CredentialsType, StatelessLister | ||||||||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||||||
# Aliasing the page results returned by `get_pages` method from the lister. | # Aliasing the page results returned by `get_pages` method from the lister. | ||||||||||
CpanListerPage = Set[str] | CpanListerPage = Set[str] | ||||||||||
def get_field_value(entry, field_name): | def get_field_value(entry, field_name): | ||||||||||
""" | """ | ||||||||||
Splits ``field_name`` on ``.``, and use it as path in the nested ``entry`` | Splits ``field_name`` on ``.``, and use it as path in the nested ``entry`` | ||||||||||
dictionary. If a value does not exist, returns None. | dictionary. If a value does not exist, returns None. | ||||||||||
>>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "foo") | >>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "foo") | ||||||||||
1 | 1 | ||||||||||
>>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "bar") | >>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "bar") | ||||||||||
vlorentz: or `module_version = release_name.replace(prefix, "", 1)`, to avoid accidentally replacing more… | |||||||||||
{'baz': 2, 'qux': [3, 4]} | {'baz': 2, 'qux': [3, 4]} | ||||||||||
>>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "bar.baz") | >>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "bar.baz") | ||||||||||
Not Done Inline Actions
redundant vlorentz: redundant | |||||||||||
2 | 2 | ||||||||||
>>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "bar.qux") | >>> get_field_value({"foo": 1, "bar": {"baz": 2, "qux": [3, 4]}}, "bar.qux") | ||||||||||
3 | 3 | ||||||||||
""" | """ | ||||||||||
fields = field_name.split(".") | fields = field_name.split(".") | ||||||||||
field_value = entry["_source"] | field_value = entry["_source"] | ||||||||||
for field in fields[:-1]: | for field in fields[:-1]: | ||||||||||
field_value = field_value.get(field, {}) | field_value = field_value.get(field, {}) | ||||||||||
field_value = field_value.get(fields[-1]) | field_value = field_value.get(fields[-1]) | ||||||||||
# scrolled results might have field value in a list | # scrolled results might have field value in a list | ||||||||||
if isinstance(field_value, list): | if isinstance(field_value, list): | ||||||||||
field_value = field_value[0] | field_value = field_value[0] | ||||||||||
return field_value | return field_value | ||||||||||
def get_module_version( | |||||||||||
module_name: str, module_version: Union[str, float, int], release_name: str | |||||||||||
) -> str: | |||||||||||
# some old versions fail to be parsed and cpan api set version to 0 | |||||||||||
if module_version == 0: | |||||||||||
prefix = f"{module_name}-" | |||||||||||
if release_name.startswith(prefix): | |||||||||||
# extract version from release name | |||||||||||
module_version = release_name.replace(prefix, "", 1) | |||||||||||
return str(module_version) | |||||||||||
class CpanLister(StatelessLister[CpanListerPage]): | class CpanLister(StatelessLister[CpanListerPage]): | ||||||||||
"""The Cpan lister list origins from 'Cpan', the Comprehensive Perl Archive | """The Cpan lister list origins from 'Cpan', the Comprehensive Perl Archive | ||||||||||
Network.""" | Network.""" | ||||||||||
LISTER_NAME = "cpan" | LISTER_NAME = "cpan" | ||||||||||
VISIT_TYPE = "cpan" | VISIT_TYPE = "cpan" | ||||||||||
INSTANCE = "cpan" | INSTANCE = "cpan" | ||||||||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | def process_release_page(self, page: List[Dict[str, Any]]): | ||||||||||
module_download_url = get_field_value(entry, "download_url") | module_download_url = get_field_value(entry, "download_url") | ||||||||||
module_sha256_checksum = get_field_value(entry, "checksum_sha256") | module_sha256_checksum = get_field_value(entry, "checksum_sha256") | ||||||||||
module_date = get_field_value(entry, "date") | module_date = get_field_value(entry, "date") | ||||||||||
module_size = get_field_value(entry, "stat.size") | module_size = get_field_value(entry, "stat.size") | ||||||||||
module_author = get_field_value(entry, "author") | module_author = get_field_value(entry, "author") | ||||||||||
module_author_fullname = get_field_value(entry, "metadata.author") | module_author_fullname = get_field_value(entry, "metadata.author") | ||||||||||
release_name = get_field_value(entry, "name") | release_name = get_field_value(entry, "name") | ||||||||||
module_version = get_module_version( | |||||||||||
module_name, module_version, release_name | |||||||||||
) | |||||||||||
self.artifacts[module_name].append( | self.artifacts[module_name].append( | ||||||||||
{ | { | ||||||||||
"url": module_download_url, | "url": module_download_url, | ||||||||||
"filename": module_download_url.split("/")[-1], | "filename": module_download_url.split("/")[-1], | ||||||||||
"checksums": {"sha256": module_sha256_checksum}, | "checksums": {"sha256": module_sha256_checksum}, | ||||||||||
"version": module_version, | "version": module_version, | ||||||||||
"length": module_size, | "length": module_size, | ||||||||||
} | } | ||||||||||
▲ Show 20 Lines • Show All 70 Lines • Show Last 20 Lines |
or module_version = release_name.replace(prefix, "", 1), to avoid accidentally replacing more than once