Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/launchpad/lister.py
Show All 14 Lines | |||||
from .models import LaunchpadModel | from .models import LaunchpadModel | ||||
class LaunchpadLister(ListerBase): | class LaunchpadLister(ListerBase): | ||||
MODEL = LaunchpadModel | MODEL = LaunchpadModel | ||||
LISTER_NAME = "launchpad" | LISTER_NAME = "launchpad" | ||||
instance = "launchpad" | instance = "launchpad" | ||||
launchpad: Launchpad | |||||
flush_packet_db = 20 | flush_packet_db = 20 | ||||
def __init__(self, override_config=None): | def __init__(self, override_config=None): | ||||
super().__init__(override_config=override_config) | super().__init__(override_config=override_config) | ||||
self.launchpad = Launchpad.login_anonymously( | launchpad = Launchpad.login_anonymously( | ||||
"softwareheritage", "production", version="devel" | "softwareheritage", "production", version="devel" | ||||
) | ) | ||||
self.get_repos = launchpad.git_repositories.getRepositories | |||||
def get_model_from_repo(self, repo: Entry) -> Dict[str, Union[str, datetime]]: | def get_model_from_repo(self, repo: Entry) -> Dict[str, Union[str, datetime]]: | ||||
return { | return { | ||||
"uid": repo.unique_name, | "uid": repo.unique_name, | ||||
"name": repo.name, | "name": repo.name, | ||||
"full_name": repo.name, | "full_name": repo.name, | ||||
"origin_url": repo.git_https_url, | "origin_url": repo.git_https_url, | ||||
"html_url": repo.web_link, | "html_url": repo.web_link, | ||||
"origin_type": "git", | "origin_type": "git", | ||||
"date_last_modified": repo.date_last_modified, | "date_last_modified": repo.date_last_modified, | ||||
} | } | ||||
def lib_response_simplified( | def lib_response_simplified( | ||||
self, response: Collection | self, response: Collection | ||||
) -> List[Dict[str, Union[str, datetime]]]: | ) -> List[Dict[str, Union[str, datetime]]]: | ||||
return [ | return [ | ||||
self.get_model_from_repo(repo) for repo in response[: len(response.entries)] | self.get_model_from_repo(repo) for repo in response[: len(response.entries)] | ||||
] | ] | ||||
def get_git_repos(self, threshold: Optional[datetime]) -> Collection: | |||||
get_repos = self.launchpad.git_repositories.getRepositories | |||||
return get_repos(order_by="most neglected first", modified_since_date=threshold) | |||||
def db_last_threshold(self) -> Optional[datetime]: | def db_last_threshold(self) -> Optional[datetime]: | ||||
t = self.db_session.query(func.max(self.MODEL.date_last_modified)).first() | t = self.db_session.query(func.max(self.MODEL.date_last_modified)).first() | ||||
if t: | if t: | ||||
return t[0] | return t[0] | ||||
else: | else: | ||||
return None | return None | ||||
def ingest_data_lp( | def ingest_data_lp( | ||||
self, identifier: Optional[datetime], checks: bool = False | self, identifier: Optional[datetime], checks: bool = False | ||||
) -> Tuple[Collection, dict]: | ) -> Tuple[Collection, dict]: | ||||
""" The core data fetch sequence. Request launchpadlib endpoint. Simplify and | """ The core data fetch sequence. Request launchpadlib endpoint. Simplify and | ||||
filter response list of repositories. Inject repo information into | filter response list of repositories. Inject repo information into | ||||
local db. Queue loader tasks for linked repositories. | local db. Queue loader tasks for linked repositories. | ||||
Args: | Args: | ||||
identifier: Resource identifier. | identifier: Resource identifier. | ||||
checks: Additional checks required | checks: Additional checks required | ||||
""" | """ | ||||
response = self.get_git_repos(identifier) | response = self.get_repos( | ||||
order_by="most neglected first", modified_since_date=identifier | |||||
) | |||||
models_list = self.lib_response_simplified(response) | models_list = self.lib_response_simplified(response) | ||||
models_list = self.filter_before_inject(models_list) | models_list = self.filter_before_inject(models_list) | ||||
if checks: | if checks: | ||||
models_list = self.do_additional_checks(models_list) | models_list = self.do_additional_checks(models_list) | ||||
if not models_list: | if not models_list: | ||||
return response, {} | return response, {} | ||||
# inject into local db | # inject into local db | ||||
injected = self.inject_repo_data_into_db(models_list) | injected = self.inject_repo_data_into_db(models_list) | ||||
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines |