Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cgit/lister.py
# Copyright (C) 2019 the Software Heritage developers | # Copyright (C) 2019 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import logging | import logging | ||||
from urllib.parse import urlparse, urljoin | from urllib.parse import urlparse, urljoin | ||||
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||
from requests import Session | from requests import Session | ||||
from requests.adapters import HTTPAdapter | from requests.adapters import HTTPAdapter | ||||
from .models import CGitModel | from .models import CGitModel | ||||
from swh.core.utils import grouper | from swh.core.utils import grouper | ||||
from swh.lister import USER_AGENT | |||||
from swh.lister.core.lister_base import ListerBase | from swh.lister.core.lister_base import ListerBase | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class CGitLister(ListerBase): | class CGitLister(ListerBase): | ||||
"""Lister class for CGit repositories. | """Lister class for CGit repositories. | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | def __init__(self, url=None, instance=None, override_config=None): | ||||
url = self.config.get('url', self.DEFAULT_URL) | url = self.config.get('url', self.DEFAULT_URL) | ||||
self.url = url | self.url = url | ||||
if not instance: | if not instance: | ||||
instance = urlparse(url).hostname | instance = urlparse(url).hostname | ||||
self.instance = instance | self.instance = instance | ||||
self.session = Session() | self.session = Session() | ||||
self.session.mount(self.url, HTTPAdapter(max_retries=3)) | self.session.mount(self.url, HTTPAdapter(max_retries=3)) | ||||
self.session.headers = { | |||||
'User-Agent': USER_AGENT, | |||||
} | |||||
def run(self): | def run(self): | ||||
total = 0 | total = 0 | ||||
for repos in grouper(self.get_repos(), 10): | for repos in grouper(self.get_repos(), 10): | ||||
models = list(filter(None, (self.build_model(repo) | models = list(filter(None, (self.build_model(repo) | ||||
for repo in repos))) | for repo in repos))) | ||||
injected_repos = self.inject_repo_data_into_db(models) | injected_repos = self.inject_repo_data_into_db(models) | ||||
self.schedule_missing_tasks(models, injected_repos) | self.schedule_missing_tasks(models, injected_repos) | ||||
▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines |