Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/maven/lister.py
| Show All 12 Lines | |||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||
| import lxml | import lxml | ||||
| import requests | import requests | ||||
| from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
| from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
| from ..pattern import CredentialsType, Lister | from ..pattern import CredentialsType, Lister | ||||
| from ..utils import is_valid_origin_url | |||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
| RepoPage = Dict[str, Any] | RepoPage = Dict[str, Any] | ||||
| SUPPORTED_SCM_TYPES = ("git", "svn", "hg", "cvs", "bzr") | SUPPORTED_SCM_TYPES = ("git", "svn", "hg", "cvs", "bzr") | ||||
| ▲ Show 20 Lines • Show All 260 Lines • ▼ Show 20 Lines | def get_scm(self, page: RepoPage) -> Optional[ListedOrigin]: | ||||
| else: | else: | ||||
| return None | return None | ||||
| if url and visit_type == "git": | if url and visit_type == "git": | ||||
| assert self.github_session is not None | assert self.github_session is not None | ||||
| # Non-github urls will be returned as is, github ones will be canonical ones | # Non-github urls will be returned as is, github ones will be canonical ones | ||||
| url = self.github_session.get_canonical_url(url) | url = self.github_session.get_canonical_url(url) | ||||
| if not url: | if not url or not is_valid_origin_url(url): | ||||
| return None | return None | ||||
| assert visit_type is not None | assert visit_type is not None | ||||
| assert self.lister_obj.id is not None | assert self.lister_obj.id is not None | ||||
| return ListedOrigin( | return ListedOrigin( | ||||
| lister_id=self.lister_obj.id, | lister_id=self.lister_obj.id, | ||||
| url=url, | url=url, | ||||
| visit_type=visit_type, | visit_type=visit_type, | ||||
| ▲ Show 20 Lines • Show All 98 Lines • Show Last 20 Lines | |||||