Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/packagist/lister.py
Show All 9 Lines | |||||
import iso8601 | import iso8601 | ||||
import requests | import requests | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
from ..utils import is_valid_origin_url | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
PackagistPageType = List[str] | PackagistPageType = List[str] | ||||
@dataclass | @dataclass | ||||
class PackagistListerState: | class PackagistListerState: | ||||
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines | def get_origins_from_page(self, page: PackagistPageType) -> Iterator[ListedOrigin]: | ||||
self.github_session.get_canonical_url(origin_url) or origin_url | self.github_session.get_canonical_url(origin_url) or origin_url | ||||
) | ) | ||||
# bitbucket closed its mercurial hosting service, those origins can not be | # bitbucket closed its mercurial hosting service, those origins can not be | ||||
# loaded into the archive anymore | # loaded into the archive anymore | ||||
if visit_type == "hg" and origin_url.startswith("https://bitbucket.org/"): | if visit_type == "hg" and origin_url.startswith("https://bitbucket.org/"): | ||||
continue | continue | ||||
if not is_valid_origin_url(origin_url): | |||||
continue | |||||
origin_urls.add(origin_url) | origin_urls.add(origin_url) | ||||
logger.debug( | logger.debug( | ||||
"Found package %s last updated on %s", package_name, last_update | "Found package %s last updated on %s", package_name, last_update | ||||
) | ) | ||||
yield ListedOrigin( | yield ListedOrigin( | ||||
lister_id=self.lister_obj.id, | lister_id=self.lister_obj.id, | ||||
url=origin_url, | url=origin_url, | ||||
visit_type=visit_type, | visit_type=visit_type, | ||||
last_update=last_update, | last_update=last_update, | ||||
) | ) | ||||
def finalize(self) -> None: | def finalize(self) -> None: | ||||
self.state.last_listing_date = self.listing_date | self.state.last_listing_date = self.listing_date | ||||
self.updated = True | self.updated = True |