# Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from dataclasses import asdict, dataclass import logging from typing import Any, Dict, Iterator, List, Optional import requests from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin from .. import USER_AGENT from ..pattern import CredentialsType, Lister logger = logging.getLogger(__name__) @dataclass class MyListerState: """State of my lister""" last_seen_id: int = 0 """Numeric id of the last repository listed on an incremental pass""" class MyLister(Lister[MyListerState, List[Dict[str, Any]]]): """List origins from My. """ LISTER_NAME = "" INSTANCE = "" API_URL = "" PAGE_SIZE = 1000 def __init__( self, scheduler: SchedulerInterface, url: str, instance: str, credentials: CredentialsType = None, ): super().__init__( scheduler=scheduler, credentials=credentials, url=url, instance=instance, ) self.session = requests.Session() self.session.headers.update( {"Accept": "application/json", "User-Agent": USER_AGENT} ) ... def state_from_dict(self, d: Dict[str, Any]) -> MyListerState: return MyListerState(**d) def state_to_dict(self, state: MyListerState) -> Dict[str, Any]: return asdict(state) def get_pages(self) -> Iterator[List[Dict[str, Any]]]: pass def get_origins_from_page( self, page: List[Dict[str, Any]] ) -> Iterator[ListedOrigin]: """Convert a page of My repositories into a list of ListedOrigins. """ assert self.lister_obj.id is not None for repo in page: ... yield ListedOrigin( lister_id=self.lister_obj.id, url=..., visit_type=..., last_update=..., ) def commit_page(self, page: List[Dict[str, Any]]): """Update the currently stored state using the latest listed page""" pass def finalize(self): pass