Page MenuHomeSoftware Heritage
Paste P918

2021 New lister template
ActivePublic

Authored by tenma on Jan 11 2021, 5:39 PM.
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from dataclasses import asdict, dataclass
import logging
from typing import Any, Dict, Iterator, List, Optional
import requests
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
from ..pattern import CredentialsType, Lister
logger = logging.getLogger(__name__)
@dataclass
class MyListerState:
"""State of my lister"""
last_seen_id: int = 0
"""Numeric id of the last repository listed on an incremental pass"""
class MyLister(Lister[MyListerState, List[Dict[str, Any]]]):
"""List origins from My.
"""
LISTER_NAME = ""
INSTANCE = ""
API_URL = ""
PAGE_SIZE = 1000
def __init__(
self,
scheduler: SchedulerInterface,
url: str,
instance: str,
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT}
)
...
def state_from_dict(self, d: Dict[str, Any]) -> MyListerState:
return MyListerState(**d)
def state_to_dict(self, state: MyListerState) -> Dict[str, Any]:
return asdict(state)
def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
pass
def get_origins_from_page(
self, page: List[Dict[str, Any]]
) -> Iterator[ListedOrigin]:
"""Convert a page of My repositories into a list of ListedOrigins.
"""
assert self.lister_obj.id is not None
for repo in page:
...
yield ListedOrigin(
lister_id=self.lister_obj.id,
url=...,
visit_type=...,
last_update=...,
)
def commit_page(self, page: List[Dict[str, Any]]):
"""Update the currently stored state using the latest listed page"""
pass
def finalize(self):
pass