Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123125
D1610.id5558.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
30 KB
Subscribers
None
D1610.id5558.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@
- `swh.lister.npm`
- `swh.lister.phabricator`
- `swh.lister.cran`
+- `swh.lister.cgit`
Dependencies
------------
@@ -203,6 +204,23 @@
cran_lister()
```
+## lister-cgit
+
+Once configured, you can execute a cgit lister using the following instructions
+in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.cgit.tasks import cgit_lister
+
+logging.basicConfig(level=logging.DEBUG)
+# simple cgit instance
+cgit_lister(url='https://git.kernel.org/')
+# cgit instance whose listed repositories differ from the base url
+cgit_lister(url='https://cgit.kde.org/',
+ url_prefix='https://anongit.kde.org/')
+```
+
Licensing
---------
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@
setuptools
xmltodict
iso8601
+beautifulsoup4
diff --git a/swh/lister/cgit/__init__.py b/swh/lister/cgit/__init__.py
new file mode 100644
diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/lister.py
@@ -0,0 +1,237 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import logging
+from bs4 import BeautifulSoup
+import requests
+from urllib.parse import urlparse
+
+from .models import CGitModel
+
+from swh.lister.core.simple_lister import SimpleLister
+from swh.lister.core.lister_transports import ListerOnePageApiTransport
+
+
+class CGitLister(ListerOnePageApiTransport, SimpleLister):
+ MODEL = CGitModel
+ LISTER_NAME = 'cgit'
+ PAGE = None
+ url_prefix_present = True
+
+ def __init__(self, url, instance=None, url_prefix=None,
+ override_config=None):
+ """Inits Class with PAGE url and origin url prefix.
+
+ Args:
+ url (str): URL of the CGit instance.
+ instance (str): Name of cgit instance.
+ url_prefix (str): Prefix of the origin_url. Origin link of the
+ repos of some special instances do not match
+ the url of the repository page, they have origin
+ url in the format <url_prefix>/<repo_name>.
+
+ """
+ self.PAGE = url
+ if url_prefix is None:
+ self.url_prefix = url
+ self.url_prefix_present = False
+ else:
+ self.url_prefix = url_prefix
+
+ if not self.url_prefix.endswith('/'):
+ self.url_prefix += '/'
+ url = urlparse(self.PAGE)
+ self.url_netloc = find_netloc(url)
+
+ if not instance:
+ instance = url.hostname
+ self.instance = instance
+
+ ListerOnePageApiTransport .__init__(self)
+ SimpleLister.__init__(self, override_config=override_config)
+
+ def list_packages(self, response):
+ """List the actual cgit instance origins from the response.
+
+ Find repositories metadata by parsing the html page (response's raw
+ content). If there are links in the html page, retrieve those
+ repositories metadata from those pages as well. Return the
+ repositories as list of dictionaries.
+
+ Args:
+ response (Response): http api request response.
+
+ Returns:
+ List of repository origin urls (as dict) included in the response.
+
+ """
+ repos_details = []
+ repos = get_repo_list(response.text)
+ url_soup = make_soup(response.text)
+ pages = self.get_pages(url_soup)
+ if len(pages) > 1:
+ repos.extend(list(self.get_repos_from_pages(pages[1:])))
+
+ for repo in repos:
+ repo_name = repo.a.text
+ origin_url = self.find_origin_url(repo, repo_name)
+
+ try:
+ time = repo.span['title']
+ except Exception:
+ time = None
+
+ if origin_url is not None:
+ repos_details.append({
+ 'name': repo_name,
+ 'time': time,
+ 'origin_url': origin_url,
+ })
+
+ random.shuffle(repos_details)
+ return repos_details
+
+ def find_origin_url(self, repo, repo_name):
+ """Finds the origin url for a repository
+
+ Args:
+ repo (Beautifulsoup): Beautifulsoup object of the repository
+ row present in base url.
+ repo_name (str): Repository name.
+
+ Returns:
+ string: origin url.
+
+ """
+ if self.url_prefix_present:
+ return self.url_prefix + repo_name
+
+ return self.get_url(repo)
+
+ def get_pages(self, url_soup):
+ """Find URL of all pages.
+
+ Finds URL of pages that are present by parsing over the HTML of
+ pagination present at the end of the page.
+
+ Args:
+ url_soup (Beautifulsoup): a beautifulsoup object of base URL
+
+ Returns:
+ list: URL of pages present for a cgit instance
+
+ """
+ pages = url_soup.find('div', {"class": "content"}).find_all('li')
+
+ if not pages:
+ return [self.PAGE]
+
+ return [self.get_url(page) for page in pages]
+
+ def get_repos_from_pages(self, pages):
+ """Find repos from all pages.
+
+ Request the available repos from the pages. This yields
+ the available repositories found as beautiful object representation.
+
+ Args:
+ pages ([str]): list of urls of all pages present for a
+ particular cgit instance.
+
+ Yields:
+ List of beautifulsoup object of repository (url) rows
+ present in pages(except first).
+
+ """
+ for page in pages:
+ response = requests.get(page)
+ if not response.ok:
+ logging.warning('Failed to retrieve repositories from page %s',
+ page)
+ continue
+
+ yield from get_repo_list(response.text)
+
+ def get_url(self, repo):
+ """Finds url of a repo page.
+
+ Finds the url of a repo page by parsing over the html of the row of
+ that repo present in the base url.
+
+ Args:
+ repo (Beautifulsoup): a beautifulsoup object of the repository
+ row present in base url.
+
+ Returns:
+ string: The url of a repo.
+
+ """
+ suffix = repo.a['href']
+ return self.url_netloc + suffix
+
+ def get_model_from_repo(self, repo):
+ """Transform from repository representation to model.
+
+ """
+ return {
+ 'uid': self.PAGE + repo['name'],
+ 'name': repo['name'],
+ 'full_name': repo['name'],
+ 'html_url': repo['origin_url'],
+ 'origin_url': repo['origin_url'],
+ 'origin_type': 'git',
+ 'time_updated': repo['time'],
+ 'instance': self.instance,
+ }
+
+ def transport_response_simplified(self, repos_details):
+ """Transform response to list for model manipulation.
+
+ """
+ return [self.get_model_from_repo(repo) for repo in repos_details]
+
+
+def find_netloc(url):
+ """Finds the network location from then url.
+
+ URL in the repo are relative to the network location part of base
+ URL, so we need to compute it to reconstruct URLs.
+
+ Args:
+ url (urllib): urllib object of url.
+
+ Returns:
+ string: Scheme and Network location part in the base URL.
+
+ Example:
+ For url = https://git.kernel.org/pub/scm/
+ >>> find_netloc(url)
+ 'https://git.kernel.org'
+
+ """
+ return '%s://%s' % (url.scheme, url.netloc)
+
+
+def get_repo_list(response):
+ """Find repositories (as beautifulsoup object) available within the server
+ response.
+
+ Args:
+ response (Response): server response
+
+ Returns:
+ List all repositories as beautifulsoup object within the response.
+
+ """
+ repo_soup = make_soup(response)
+ return repo_soup \
+ .find('div', {"class": "content"}).find_all("tr", {"class": ""})
+
+
+def make_soup(response):
+ """Instantiates a beautiful soup object from the response object.
+
+ """
+ return BeautifulSoup(response, features="html.parser")
diff --git a/swh/lister/cgit/models.py b/swh/lister/cgit/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/models.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from ..core.models import ModelBase
+
+
+class CGitModel(ModelBase):
+ """a CGit repository representation
+
+ """
+ __tablename__ = 'cgit_repo'
+
+ uid = Column(String, primary_key=True)
+ time_updated = Column(String)
+ instance = Column(String, index=True)
diff --git a/swh/lister/cgit/tasks.py b/swh/lister/cgit/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tasks.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from .lister import CGitLister
+
+
+def new_lister(url='https://git.kernel.org/',
+ url_prefix=None,
+ instance='kernal', **kw):
+ return CGitLister(url=url, instance=instance, url_prefix=url_prefix,
+ **kw)
+
+
+@app.task(name=__name__ + '.CGitListerTask')
+def cgit_lister(**lister_args):
+ lister = new_lister(**lister_args)
+ lister.run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/cgit/tests/__init__.py b/swh/lister/cgit/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/cgit/tests/conftest.py b/swh/lister/cgit/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/cgit/tests/repo_list.txt b/swh/lister/cgit/tests/repo_list.txt
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/repo_list.txt
@@ -0,0 +1,15 @@
+<tr><td class="toplevel-repo"><a href="/openembedded-core/" title="openembedded-core">openembedded-core</a></td><td><a href="/openembedded-core/">OpenEmbedded Core layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-26 13:04:31 +0000">5 hours</span></td><td><a class="button" href="/openembedded-core/">summary</a><a class="button" href="/openembedded-core/log/">log</a><a class="button" href="/openembedded-core/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded-core-contrib/" title="openembedded-core-contrib">openembedded-core-contrib</a></td><td><a href="/openembedded-core-contrib/">OpenEmbedded Core user contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-26 12:57:22 +0000">5 hours</span></td><td><a class="button" href="/openembedded-core-contrib/">summary</a><a class="button" href="/openembedded-core-contrib/log/">log</a><a class="button" href="/openembedded-core-contrib/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-openembedded/" title="meta-openembedded">meta-openembedded</a></td><td><a href="/meta-openembedded/">Collection of OpenEmbedded layers</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-25 21:22:33 +0000">21 hours</span></td><td><a class="button" href="/meta-openembedded/">summary</a><a class="button" href="/meta-openembedded/log/">log</a><a class="button" href="/meta-openembedded/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-openembedded-contrib/" title="meta-openembedded-contrib">meta-openembedded-contrib</a></td><td><a href="/meta-openembedded-contrib/">OpenEmbedded layers collection contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-25 21:22:33 +0000">21 hours</span></td><td><a class="button" href="/meta-openembedded-contrib/">summary</a><a class="button" href="/meta-openembedded-contrib/log/">log</a><a class="button" href="/meta-openembedded-contrib/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/bitbake/" title="bitbake">bitbake</a></td><td><a href="/bitbake/">Bitbake Development tree</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-days" title="2019-06-19 17:12:23 +0000">7 days</span></td><td><a class="button" href="/bitbake/">summary</a><a class="button" href="/bitbake/log/">log</a><a class="button" href="/bitbake/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/bitbake-contrib/" title="bitbake-contrib">bitbake-contrib</a></td><td><a href="/bitbake-contrib/">Bitbake user contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-days" title="2019-06-18 15:30:38 +0000">8 days</span></td><td><a class="button" href="/bitbake-contrib/">summary</a><a class="button" href="/bitbake-contrib/log/">log</a><a class="button" href="/bitbake-contrib/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-handheld/" title="meta-handheld">meta-handheld</a></td><td><a href="/meta-handheld/">Handheld device meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-months" title="2018-10-01 21:25:11 +0000">9 months</span></td><td><a class="button" href="/meta-handheld/">summary</a><a class="button" href="/meta-handheld/log/">log</a><a class="button" href="/meta-handheld/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-opie/" title="meta-opie">meta-opie</a></td><td><a href="/meta-opie/">OPIE meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2016-06-12 03:58:09 +0000">3 years</span></td><td><a class="button" href="/meta-opie/">summary</a><a class="button" href="/meta-opie/log/">log</a><a class="button" href="/meta-opie/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded/" title="openembedded">openembedded</a></td><td><a href="/openembedded/">Classic OpenEmbedded Development Tree</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2015-05-05 08:44:03 +0000">4 years</span></td><td><a class="button" href="/openembedded/">summary</a><a class="button" href="/openembedded/log/">log</a><a class="button" href="/openembedded/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded-web-frontpages/" title="openembedded-web-frontpages">openembedded-web-frontpages</a></td><td><a href="/openembedded-web-frontpages/">OpenEmbedded Website Source Code</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2014-08-24 13:39:24 +0000">5 years</span></td><td><a class="button" href="/openembedded-web-frontpages/">summary</a><a class="button" href="/openembedded-web-frontpages/log/">log</a><a class="button" href="/openembedded-web-frontpages/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded-admin/" title="openembedded-admin">openembedded-admin</a></td><td><a href="/openembedded-admin/">OE Admin tools</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2013-10-21 21:20:18 +0000">6 years</span></td><td><a class="button" href="/openembedded-admin/">summary</a><a class="button" href="/openembedded-admin/log/">log</a><a class="button" href="/openembedded-admin/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-micro/" title="meta-micro">meta-micro</a></td><td><a href="/meta-micro/">Micro distribution meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2012-09-08 21:51:18 +0000">7 years</span></td><td><a class="button" href="/meta-micro/">summary</a><a class="button" href="/meta-micro/log/">log</a><a class="button" href="/meta-micro/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/eclipsetools/" title="eclipsetools">eclipsetools</a></td><td><a href="/eclipsetools/">Eclipse tools for OpenEmbedded</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2011-11-05 09:35:20 +0000">8 years</span></td><td><a class="button" href="/eclipsetools/">summary</a><a class="button" href="/eclipsetools/log/">log</a><a class="button" href="/eclipsetools/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/oetest/" title="oetest">oetest</a></td><td><a href="/oetest/">Test utilities for OpenEmbedded</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2009-08-14 14:10:25 +0000">10 years</span></td><td><a class="button" href="/oetest/">summary</a><a class="button" href="/oetest/log/">log</a><a class="button" href="/oetest/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/oebuildstats/" title="oebuildstats">oebuildstats</a></td><td><a href="/oebuildstats/">OE Build Stats</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td></td><td><a class="button" href="/oebuildstats/">summary</a><a class="button" href="/oebuildstats/log/">log</a><a class="button" href="/oebuildstats/tree/">tree</a></td></tr>
diff --git a/swh/lister/cgit/tests/response.html b/swh/lister/cgit/tests/response.html
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/response.html
@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>OpenEmbedded Git Repository Browser</title>
+<meta name='generator' content='cgit v1.2'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+<link rel='shortcut icon' href='/favicon.ico'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/oe.png' alt='cgit logo'/></a></td>
+<td class='main'>OpenEmbedded Git Repository Browser</td></tr>
+<tr><td class='sub'>A web frontend for git repositories</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=owner'>Owner</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-core' href='/openembedded-core/'>openembedded-core</a></td><td><a href='/openembedded-core/'>OpenEmbedded Core layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-26 13:04:31 +0000'>5 hours</span></td><td><a class='button' href='/openembedded-core/'>summary</a><a class='button' href='/openembedded-core/log/'>log</a><a class='button' href='/openembedded-core/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-core-contrib' href='/openembedded-core-contrib/'>openembedded-core-contrib</a></td><td><a href='/openembedded-core-contrib/'>OpenEmbedded Core user contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-26 12:57:22 +0000'>5 hours</span></td><td><a class='button' href='/openembedded-core-contrib/'>summary</a><a class='button' href='/openembedded-core-contrib/log/'>log</a><a class='button' href='/openembedded-core-contrib/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-openembedded' href='/meta-openembedded/'>meta-openembedded</a></td><td><a href='/meta-openembedded/'>Collection of OpenEmbedded layers</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-25 21:22:33 +0000'>21 hours</span></td><td><a class='button' href='/meta-openembedded/'>summary</a><a class='button' href='/meta-openembedded/log/'>log</a><a class='button' href='/meta-openembedded/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-openembedded-contrib' href='/meta-openembedded-contrib/'>meta-openembedded-contrib</a></td><td><a href='/meta-openembedded-contrib/'>OpenEmbedded layers collection contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-25 21:22:33 +0000'>21 hours</span></td><td><a class='button' href='/meta-openembedded-contrib/'>summary</a><a class='button' href='/meta-openembedded-contrib/log/'>log</a><a class='button' href='/meta-openembedded-contrib/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='bitbake' href='/bitbake/'>bitbake</a></td><td><a href='/bitbake/'>Bitbake Development tree</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-days' title='2019-06-19 17:12:23 +0000'>7 days</span></td><td><a class='button' href='/bitbake/'>summary</a><a class='button' href='/bitbake/log/'>log</a><a class='button' href='/bitbake/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='bitbake-contrib' href='/bitbake-contrib/'>bitbake-contrib</a></td><td><a href='/bitbake-contrib/'>Bitbake user contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-days' title='2019-06-18 15:30:38 +0000'>8 days</span></td><td><a class='button' href='/bitbake-contrib/'>summary</a><a class='button' href='/bitbake-contrib/log/'>log</a><a class='button' href='/bitbake-contrib/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-handheld' href='/meta-handheld/'>meta-handheld</a></td><td><a href='/meta-handheld/'>Handheld device meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-months' title='2018-10-01 21:25:11 +0000'>9 months</span></td><td><a class='button' href='/meta-handheld/'>summary</a><a class='button' href='/meta-handheld/log/'>log</a><a class='button' href='/meta-handheld/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-opie' href='/meta-opie/'>meta-opie</a></td><td><a href='/meta-opie/'>OPIE meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2016-06-12 03:58:09 +0000'>3 years</span></td><td><a class='button' href='/meta-opie/'>summary</a><a class='button' href='/meta-opie/log/'>log</a><a class='button' href='/meta-opie/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded' href='/openembedded/'>openembedded</a></td><td><a href='/openembedded/'>Classic OpenEmbedded Development Tree</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2015-05-05 08:44:03 +0000'>4 years</span></td><td><a class='button' href='/openembedded/'>summary</a><a class='button' href='/openembedded/log/'>log</a><a class='button' href='/openembedded/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-web-frontpages' href='/openembedded-web-frontpages/'>openembedded-web-frontpages</a></td><td><a href='/openembedded-web-frontpages/'>OpenEmbedded Website Source Code</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2014-08-24 13:39:24 +0000'>5 years</span></td><td><a class='button' href='/openembedded-web-frontpages/'>summary</a><a class='button' href='/openembedded-web-frontpages/log/'>log</a><a class='button' href='/openembedded-web-frontpages/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-admin' href='/openembedded-admin/'>openembedded-admin</a></td><td><a href='/openembedded-admin/'>OE Admin tools</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2013-10-21 21:20:18 +0000'>6 years</span></td><td><a class='button' href='/openembedded-admin/'>summary</a><a class='button' href='/openembedded-admin/log/'>log</a><a class='button' href='/openembedded-admin/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-micro' href='/meta-micro/'>meta-micro</a></td><td><a href='/meta-micro/'>Micro distribution meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2012-09-08 21:51:18 +0000'>7 years</span></td><td><a class='button' href='/meta-micro/'>summary</a><a class='button' href='/meta-micro/log/'>log</a><a class='button' href='/meta-micro/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='eclipsetools' href='/eclipsetools/'>eclipsetools</a></td><td><a href='/eclipsetools/'>Eclipse tools for OpenEmbedded</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2011-11-05 09:35:20 +0000'>8 years</span></td><td><a class='button' href='/eclipsetools/'>summary</a><a class='button' href='/eclipsetools/log/'>log</a><a class='button' href='/eclipsetools/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='oetest' href='/oetest/'>oetest</a></td><td><a href='/oetest/'>Test utilities for OpenEmbedded</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2009-08-14 14:10:25 +0000'>10 years</span></td><td><a class='button' href='/oetest/'>summary</a><a class='button' href='/oetest/log/'>log</a><a class='button' href='/oetest/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='oebuildstats' href='/oebuildstats/'>oebuildstats</a></td><td><a href='/oebuildstats/'>OE Build Stats</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td></td><td><a class='button' href='/oebuildstats/'>summary</a><a class='button' href='/oebuildstats/log/'>log</a><a class='button' href='/oebuildstats/tree/'>tree</a></td></tr>
+</table></div> <!-- class=content -->
+<div class='footer'>generated by <a href='https://git.zx2c4.com/cgit/about/'>cgit v1.2</a> (<a href='https://git-scm.com/'>git 2.18.0</a>) at 2019-06-26 18:03:12 +0000</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from urllib.parse import urlparse
+
+from swh.lister.cgit.lister import find_netloc, get_repo_list
+
+
+def test_get_repo_list():
+ f = open('swh/lister/cgit/tests/response.html')
+ repos = get_repo_list(f.read())
+ f = open('swh/lister/cgit/tests/repo_list.txt')
+ expected_repos = f.readlines()
+ expected_repos = list(map((lambda repo: repo[:-1]), expected_repos))
+ assert len(repos) == len(expected_repos)
+ for i in range(len(repos)):
+ assert str(repos[i]) == expected_repos[i]
+
+
+def test_find_netloc():
+ first_url = urlparse('http://git.savannah.gnu.org/cgit/')
+ second_url = urlparse('https://cgit.kde.org/')
+
+ assert find_netloc(first_url) == 'http://git.savannah.gnu.org'
+ assert find_netloc(second_url) == 'https://cgit.kde.org'
diff --git a/swh/lister/cgit/tests/test_tasks.py b/swh/lister/cgit/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/test_tasks.py
@@ -0,0 +1,53 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.cgit.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.cgit.tasks.CGitLister')
+def test_lister_no_url_prefix(lister, swh_app, celery_session_worker):
+ # setup the mocked CGitLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.cgit.tasks.CGitListerTask',
+ kwargs=dict(url='https://git.kernel.org/', instance='kernel'))
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with(
+ url='https://git.kernel.org/',
+ url_prefix=None,
+ instance='kernel')
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
+
+
+@patch('swh.lister.cgit.tasks.CGitLister')
+def test_lister_with_url_prefix(lister, swh_app, celery_session_worker):
+ # setup the mocked CGitLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.cgit.tasks.CGitListerTask',
+ kwargs=dict(url='https://cgit.kde.org/',
+ url_prefix='https://anongit.kde.org/', instance='kde'))
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with(
+ url='https://cgit.kde.org/',
+ url_prefix='https://anongit.kde.org/',
+ instance='kde')
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator', 'gnu', 'cran']
+ 'npm', 'phabricator', 'gnu', 'cran', 'cgit']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -125,6 +125,14 @@
from .cran.lister import CRANLister
_lister = CRANLister(override_config=override_conf)
+ elif lister == 'cgit':
+ from .cgit.models import ModelBase
+ from .cgit.lister import CGitLister
+ _lister = CGitLister(
+ url='http://git.savannah.gnu.org/cgit/',
+ url_prefix='http://git.savannah.gnu.org/git/',
+ override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -6,6 +6,7 @@
def celery_includes():
return [
'swh.lister.bitbucket.tasks',
+ 'swh.lister.cgit.tasks',
'swh.lister.cran.tasks',
'swh.lister.debian.tasks',
'swh.lister.github.tasks',
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 10:39 PM (2 d, 13 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234068
Attached To
D1610: swh.lister.cgit
Event Timeline
Log In to Comment