Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cran/lister.py
# Copyright (C) 2019-2021 the Software Heritage developers | # Copyright (C) 2019-2021 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import json | import json | ||||
import logging | import logging | ||||
import subprocess | import subprocess | ||||
from typing import Dict, Iterator, List, Optional, Tuple | from typing import Dict, Iterator, List, Optional, Tuple | ||||
import pkg_resources | import pkg_resources | ||||
from swh.lister.pattern import StatelessLister | from swh.lister.pattern import CredentialsType, StatelessLister | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
CRAN_MIRROR = "https://cran.r-project.org" | CRAN_MIRROR = "https://cran.r-project.org" | ||||
PageType = List[Dict[str, str]] | PageType = List[Dict[str, str]] | ||||
class CRANLister(StatelessLister[PageType]): | class CRANLister(StatelessLister[PageType]): | ||||
""" | """ | ||||
List all packages hosted on The Comprehensive R Archive Network. | List all packages hosted on The Comprehensive R Archive Network. | ||||
""" | """ | ||||
LISTER_NAME = "CRAN" | LISTER_NAME = "CRAN" | ||||
def __init__( | def __init__( | ||||
self, scheduler: SchedulerInterface, | self, | ||||
scheduler: SchedulerInterface, | |||||
credentials: Optional[CredentialsType] = None, | |||||
): | ): | ||||
super().__init__(scheduler, url=CRAN_MIRROR, instance="cran") | super().__init__( | ||||
scheduler, url=CRAN_MIRROR, instance="cran", credentials=credentials | |||||
) | |||||
def get_pages(self) -> Iterator[PageType]: | def get_pages(self) -> Iterator[PageType]: | ||||
""" | """ | ||||
Yields a single page containing all CRAN packages info. | Yields a single page containing all CRAN packages info. | ||||
""" | """ | ||||
yield read_cran_data() | yield read_cran_data() | ||||
def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]: | def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]: | ||||
▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines |