Page MenuHomeSoftware Heritage

D1610.id5558.diff
No OneTemporary

D1610.id5558.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@
- `swh.lister.npm`
- `swh.lister.phabricator`
- `swh.lister.cran`
+- `swh.lister.cgit`
Dependencies
------------
@@ -203,6 +204,23 @@
cran_lister()
```
+## lister-cgit
+
+Once configured, you can execute a cgit lister using the following instructions
+in a `python3` script:
+
+```lang=python
+import logging
+from swh.lister.cgit.tasks import cgit_lister
+
+logging.basicConfig(level=logging.DEBUG)
+# simple cgit instance
+cgit_lister(url='https://git.kernel.org/')
+# cgit instance whose listed repositories differ from the base url
+cgit_lister(url='https://cgit.kde.org/',
+ url_prefix='https://anongit.kde.org/')
+```
+
Licensing
---------
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@
setuptools
xmltodict
iso8601
+beautifulsoup4
diff --git a/swh/lister/cgit/__init__.py b/swh/lister/cgit/__init__.py
new file mode 100644
diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/lister.py
@@ -0,0 +1,237 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import logging
+from bs4 import BeautifulSoup
+import requests
+from urllib.parse import urlparse
+
+from .models import CGitModel
+
+from swh.lister.core.simple_lister import SimpleLister
+from swh.lister.core.lister_transports import ListerOnePageApiTransport
+
+
+class CGitLister(ListerOnePageApiTransport, SimpleLister):
+ MODEL = CGitModel
+ LISTER_NAME = 'cgit'
+ PAGE = None
+ url_prefix_present = True
+
+ def __init__(self, url, instance=None, url_prefix=None,
+ override_config=None):
+ """Inits Class with PAGE url and origin url prefix.
+
+ Args:
+ url (str): URL of the CGit instance.
+ instance (str): Name of cgit instance.
+ url_prefix (str): Prefix of the origin_url. Origin link of the
+ repos of some special instances do not match
+ the url of the repository page, they have origin
+ url in the format <url_prefix>/<repo_name>.
+
+ """
+ self.PAGE = url
+ if url_prefix is None:
+ self.url_prefix = url
+ self.url_prefix_present = False
+ else:
+ self.url_prefix = url_prefix
+
+ if not self.url_prefix.endswith('/'):
+ self.url_prefix += '/'
+ url = urlparse(self.PAGE)
+ self.url_netloc = find_netloc(url)
+
+ if not instance:
+ instance = url.hostname
+ self.instance = instance
+
+ ListerOnePageApiTransport .__init__(self)
+ SimpleLister.__init__(self, override_config=override_config)
+
+ def list_packages(self, response):
+ """List the actual cgit instance origins from the response.
+
+ Find repositories metadata by parsing the html page (response's raw
+ content). If there are links in the html page, retrieve those
+ repositories metadata from those pages as well. Return the
+ repositories as list of dictionaries.
+
+ Args:
+ response (Response): http api request response.
+
+ Returns:
+ List of repository origin urls (as dict) included in the response.
+
+ """
+ repos_details = []
+ repos = get_repo_list(response.text)
+ url_soup = make_soup(response.text)
+ pages = self.get_pages(url_soup)
+ if len(pages) > 1:
+ repos.extend(list(self.get_repos_from_pages(pages[1:])))
+
+ for repo in repos:
+ repo_name = repo.a.text
+ origin_url = self.find_origin_url(repo, repo_name)
+
+ try:
+ time = repo.span['title']
+ except Exception:
+ time = None
+
+ if origin_url is not None:
+ repos_details.append({
+ 'name': repo_name,
+ 'time': time,
+ 'origin_url': origin_url,
+ })
+
+ random.shuffle(repos_details)
+ return repos_details
+
+ def find_origin_url(self, repo, repo_name):
+ """Finds the origin url for a repository
+
+ Args:
+ repo (Beautifulsoup): Beautifulsoup object of the repository
+ row present in base url.
+ repo_name (str): Repository name.
+
+ Returns:
+ string: origin url.
+
+ """
+ if self.url_prefix_present:
+ return self.url_prefix + repo_name
+
+ return self.get_url(repo)
+
+ def get_pages(self, url_soup):
+ """Find URL of all pages.
+
+ Finds URL of pages that are present by parsing over the HTML of
+ pagination present at the end of the page.
+
+ Args:
+ url_soup (Beautifulsoup): a beautifulsoup object of base URL
+
+ Returns:
+ list: URL of pages present for a cgit instance
+
+ """
+ pages = url_soup.find('div', {"class": "content"}).find_all('li')
+
+ if not pages:
+ return [self.PAGE]
+
+ return [self.get_url(page) for page in pages]
+
+ def get_repos_from_pages(self, pages):
+ """Find repos from all pages.
+
+ Request the available repos from the pages. This yields
+ the available repositories found as beautiful object representation.
+
+ Args:
+ pages ([str]): list of urls of all pages present for a
+ particular cgit instance.
+
+ Yields:
+ List of beautifulsoup object of repository (url) rows
+ present in pages(except first).
+
+ """
+ for page in pages:
+ response = requests.get(page)
+ if not response.ok:
+ logging.warning('Failed to retrieve repositories from page %s',
+ page)
+ continue
+
+ yield from get_repo_list(response.text)
+
+ def get_url(self, repo):
+ """Finds url of a repo page.
+
+ Finds the url of a repo page by parsing over the html of the row of
+ that repo present in the base url.
+
+ Args:
+ repo (Beautifulsoup): a beautifulsoup object of the repository
+ row present in base url.
+
+ Returns:
+ string: The url of a repo.
+
+ """
+ suffix = repo.a['href']
+ return self.url_netloc + suffix
+
+ def get_model_from_repo(self, repo):
+ """Transform from repository representation to model.
+
+ """
+ return {
+ 'uid': self.PAGE + repo['name'],
+ 'name': repo['name'],
+ 'full_name': repo['name'],
+ 'html_url': repo['origin_url'],
+ 'origin_url': repo['origin_url'],
+ 'origin_type': 'git',
+ 'time_updated': repo['time'],
+ 'instance': self.instance,
+ }
+
+ def transport_response_simplified(self, repos_details):
+ """Transform response to list for model manipulation.
+
+ """
+ return [self.get_model_from_repo(repo) for repo in repos_details]
+
+
+def find_netloc(url):
+ """Finds the network location from then url.
+
+ URL in the repo are relative to the network location part of base
+ URL, so we need to compute it to reconstruct URLs.
+
+ Args:
+ url (urllib): urllib object of url.
+
+ Returns:
+ string: Scheme and Network location part in the base URL.
+
+ Example:
+ For url = https://git.kernel.org/pub/scm/
+ >>> find_netloc(url)
+ 'https://git.kernel.org'
+
+ """
+ return '%s://%s' % (url.scheme, url.netloc)
+
+
+def get_repo_list(response):
+ """Find repositories (as beautifulsoup object) available within the server
+ response.
+
+ Args:
+ response (Response): server response
+
+ Returns:
+ List all repositories as beautifulsoup object within the response.
+
+ """
+ repo_soup = make_soup(response)
+ return repo_soup \
+ .find('div', {"class": "content"}).find_all("tr", {"class": ""})
+
+
+def make_soup(response):
+ """Instantiates a beautiful soup object from the response object.
+
+ """
+ return BeautifulSoup(response, features="html.parser")
diff --git a/swh/lister/cgit/models.py b/swh/lister/cgit/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/models.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from ..core.models import ModelBase
+
+
+class CGitModel(ModelBase):
+ """a CGit repository representation
+
+ """
+ __tablename__ = 'cgit_repo'
+
+ uid = Column(String, primary_key=True)
+ time_updated = Column(String)
+ instance = Column(String, index=True)
diff --git a/swh/lister/cgit/tasks.py b/swh/lister/cgit/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tasks.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.celery_backend.config import app
+
+from .lister import CGitLister
+
+
+def new_lister(url='https://git.kernel.org/',
+ url_prefix=None,
+ instance='kernal', **kw):
+ return CGitLister(url=url, instance=instance, url_prefix=url_prefix,
+ **kw)
+
+
+@app.task(name=__name__ + '.CGitListerTask')
+def cgit_lister(**lister_args):
+ lister = new_lister(**lister_args)
+ lister.run()
+
+
+@app.task(name=__name__ + '.ping')
+def ping():
+ return 'OK'
diff --git a/swh/lister/cgit/tests/__init__.py b/swh/lister/cgit/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/cgit/tests/conftest.py b/swh/lister/cgit/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/conftest.py
@@ -0,0 +1 @@
+from swh.lister.core.tests.conftest import * # noqa
diff --git a/swh/lister/cgit/tests/repo_list.txt b/swh/lister/cgit/tests/repo_list.txt
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/repo_list.txt
@@ -0,0 +1,15 @@
+<tr><td class="toplevel-repo"><a href="/openembedded-core/" title="openembedded-core">openembedded-core</a></td><td><a href="/openembedded-core/">OpenEmbedded Core layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-26 13:04:31 +0000">5 hours</span></td><td><a class="button" href="/openembedded-core/">summary</a><a class="button" href="/openembedded-core/log/">log</a><a class="button" href="/openembedded-core/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded-core-contrib/" title="openembedded-core-contrib">openembedded-core-contrib</a></td><td><a href="/openembedded-core-contrib/">OpenEmbedded Core user contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-26 12:57:22 +0000">5 hours</span></td><td><a class="button" href="/openembedded-core-contrib/">summary</a><a class="button" href="/openembedded-core-contrib/log/">log</a><a class="button" href="/openembedded-core-contrib/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-openembedded/" title="meta-openembedded">meta-openembedded</a></td><td><a href="/meta-openembedded/">Collection of OpenEmbedded layers</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-25 21:22:33 +0000">21 hours</span></td><td><a class="button" href="/meta-openembedded/">summary</a><a class="button" href="/meta-openembedded/log/">log</a><a class="button" href="/meta-openembedded/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-openembedded-contrib/" title="meta-openembedded-contrib">meta-openembedded-contrib</a></td><td><a href="/meta-openembedded-contrib/">OpenEmbedded layers collection contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-25 21:22:33 +0000">21 hours</span></td><td><a class="button" href="/meta-openembedded-contrib/">summary</a><a class="button" href="/meta-openembedded-contrib/log/">log</a><a class="button" href="/meta-openembedded-contrib/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/bitbake/" title="bitbake">bitbake</a></td><td><a href="/bitbake/">Bitbake Development tree</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-days" title="2019-06-19 17:12:23 +0000">7 days</span></td><td><a class="button" href="/bitbake/">summary</a><a class="button" href="/bitbake/log/">log</a><a class="button" href="/bitbake/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/bitbake-contrib/" title="bitbake-contrib">bitbake-contrib</a></td><td><a href="/bitbake-contrib/">Bitbake user contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-days" title="2019-06-18 15:30:38 +0000">8 days</span></td><td><a class="button" href="/bitbake-contrib/">summary</a><a class="button" href="/bitbake-contrib/log/">log</a><a class="button" href="/bitbake-contrib/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-handheld/" title="meta-handheld">meta-handheld</a></td><td><a href="/meta-handheld/">Handheld device meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-months" title="2018-10-01 21:25:11 +0000">9 months</span></td><td><a class="button" href="/meta-handheld/">summary</a><a class="button" href="/meta-handheld/log/">log</a><a class="button" href="/meta-handheld/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-opie/" title="meta-opie">meta-opie</a></td><td><a href="/meta-opie/">OPIE meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2016-06-12 03:58:09 +0000">3 years</span></td><td><a class="button" href="/meta-opie/">summary</a><a class="button" href="/meta-opie/log/">log</a><a class="button" href="/meta-opie/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded/" title="openembedded">openembedded</a></td><td><a href="/openembedded/">Classic OpenEmbedded Development Tree</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2015-05-05 08:44:03 +0000">4 years</span></td><td><a class="button" href="/openembedded/">summary</a><a class="button" href="/openembedded/log/">log</a><a class="button" href="/openembedded/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded-web-frontpages/" title="openembedded-web-frontpages">openembedded-web-frontpages</a></td><td><a href="/openembedded-web-frontpages/">OpenEmbedded Website Source Code</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2014-08-24 13:39:24 +0000">5 years</span></td><td><a class="button" href="/openembedded-web-frontpages/">summary</a><a class="button" href="/openembedded-web-frontpages/log/">log</a><a class="button" href="/openembedded-web-frontpages/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/openembedded-admin/" title="openembedded-admin">openembedded-admin</a></td><td><a href="/openembedded-admin/">OE Admin tools</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2013-10-21 21:20:18 +0000">6 years</span></td><td><a class="button" href="/openembedded-admin/">summary</a><a class="button" href="/openembedded-admin/log/">log</a><a class="button" href="/openembedded-admin/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/meta-micro/" title="meta-micro">meta-micro</a></td><td><a href="/meta-micro/">Micro distribution meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2012-09-08 21:51:18 +0000">7 years</span></td><td><a class="button" href="/meta-micro/">summary</a><a class="button" href="/meta-micro/log/">log</a><a class="button" href="/meta-micro/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/eclipsetools/" title="eclipsetools">eclipsetools</a></td><td><a href="/eclipsetools/">Eclipse tools for OpenEmbedded</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2011-11-05 09:35:20 +0000">8 years</span></td><td><a class="button" href="/eclipsetools/">summary</a><a class="button" href="/eclipsetools/log/">log</a><a class="button" href="/eclipsetools/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/oetest/" title="oetest">oetest</a></td><td><a href="/oetest/">Test utilities for OpenEmbedded</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2009-08-14 14:10:25 +0000">10 years</span></td><td><a class="button" href="/oetest/">summary</a><a class="button" href="/oetest/log/">log</a><a class="button" href="/oetest/tree/">tree</a></td></tr>
+<tr><td class="toplevel-repo"><a href="/oebuildstats/" title="oebuildstats">oebuildstats</a></td><td><a href="/oebuildstats/">OE Build Stats</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td></td><td><a class="button" href="/oebuildstats/">summary</a><a class="button" href="/oebuildstats/log/">log</a><a class="button" href="/oebuildstats/tree/">tree</a></td></tr>
diff --git a/swh/lister/cgit/tests/response.html b/swh/lister/cgit/tests/response.html
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/response.html
@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>OpenEmbedded Git Repository Browser</title>
+<meta name='generator' content='cgit v1.2'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+<link rel='shortcut icon' href='/favicon.ico'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/oe.png' alt='cgit logo'/></a></td>
+<td class='main'>OpenEmbedded Git Repository Browser</td></tr>
+<tr><td class='sub'>A web frontend for git repositories</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=owner'>Owner</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-core' href='/openembedded-core/'>openembedded-core</a></td><td><a href='/openembedded-core/'>OpenEmbedded Core layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-26 13:04:31 +0000'>5 hours</span></td><td><a class='button' href='/openembedded-core/'>summary</a><a class='button' href='/openembedded-core/log/'>log</a><a class='button' href='/openembedded-core/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-core-contrib' href='/openembedded-core-contrib/'>openembedded-core-contrib</a></td><td><a href='/openembedded-core-contrib/'>OpenEmbedded Core user contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-26 12:57:22 +0000'>5 hours</span></td><td><a class='button' href='/openembedded-core-contrib/'>summary</a><a class='button' href='/openembedded-core-contrib/log/'>log</a><a class='button' href='/openembedded-core-contrib/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-openembedded' href='/meta-openembedded/'>meta-openembedded</a></td><td><a href='/meta-openembedded/'>Collection of OpenEmbedded layers</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-25 21:22:33 +0000'>21 hours</span></td><td><a class='button' href='/meta-openembedded/'>summary</a><a class='button' href='/meta-openembedded/log/'>log</a><a class='button' href='/meta-openembedded/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-openembedded-contrib' href='/meta-openembedded-contrib/'>meta-openembedded-contrib</a></td><td><a href='/meta-openembedded-contrib/'>OpenEmbedded layers collection contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-25 21:22:33 +0000'>21 hours</span></td><td><a class='button' href='/meta-openembedded-contrib/'>summary</a><a class='button' href='/meta-openembedded-contrib/log/'>log</a><a class='button' href='/meta-openembedded-contrib/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='bitbake' href='/bitbake/'>bitbake</a></td><td><a href='/bitbake/'>Bitbake Development tree</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-days' title='2019-06-19 17:12:23 +0000'>7 days</span></td><td><a class='button' href='/bitbake/'>summary</a><a class='button' href='/bitbake/log/'>log</a><a class='button' href='/bitbake/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='bitbake-contrib' href='/bitbake-contrib/'>bitbake-contrib</a></td><td><a href='/bitbake-contrib/'>Bitbake user contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-days' title='2019-06-18 15:30:38 +0000'>8 days</span></td><td><a class='button' href='/bitbake-contrib/'>summary</a><a class='button' href='/bitbake-contrib/log/'>log</a><a class='button' href='/bitbake-contrib/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-handheld' href='/meta-handheld/'>meta-handheld</a></td><td><a href='/meta-handheld/'>Handheld device meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-months' title='2018-10-01 21:25:11 +0000'>9 months</span></td><td><a class='button' href='/meta-handheld/'>summary</a><a class='button' href='/meta-handheld/log/'>log</a><a class='button' href='/meta-handheld/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-opie' href='/meta-opie/'>meta-opie</a></td><td><a href='/meta-opie/'>OPIE meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2016-06-12 03:58:09 +0000'>3 years</span></td><td><a class='button' href='/meta-opie/'>summary</a><a class='button' href='/meta-opie/log/'>log</a><a class='button' href='/meta-opie/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded' href='/openembedded/'>openembedded</a></td><td><a href='/openembedded/'>Classic OpenEmbedded Development Tree</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2015-05-05 08:44:03 +0000'>4 years</span></td><td><a class='button' href='/openembedded/'>summary</a><a class='button' href='/openembedded/log/'>log</a><a class='button' href='/openembedded/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-web-frontpages' href='/openembedded-web-frontpages/'>openembedded-web-frontpages</a></td><td><a href='/openembedded-web-frontpages/'>OpenEmbedded Website Source Code</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2014-08-24 13:39:24 +0000'>5 years</span></td><td><a class='button' href='/openembedded-web-frontpages/'>summary</a><a class='button' href='/openembedded-web-frontpages/log/'>log</a><a class='button' href='/openembedded-web-frontpages/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='openembedded-admin' href='/openembedded-admin/'>openembedded-admin</a></td><td><a href='/openembedded-admin/'>OE Admin tools</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2013-10-21 21:20:18 +0000'>6 years</span></td><td><a class='button' href='/openembedded-admin/'>summary</a><a class='button' href='/openembedded-admin/log/'>log</a><a class='button' href='/openembedded-admin/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='meta-micro' href='/meta-micro/'>meta-micro</a></td><td><a href='/meta-micro/'>Micro distribution meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2012-09-08 21:51:18 +0000'>7 years</span></td><td><a class='button' href='/meta-micro/'>summary</a><a class='button' href='/meta-micro/log/'>log</a><a class='button' href='/meta-micro/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='eclipsetools' href='/eclipsetools/'>eclipsetools</a></td><td><a href='/eclipsetools/'>Eclipse tools for OpenEmbedded</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2011-11-05 09:35:20 +0000'>8 years</span></td><td><a class='button' href='/eclipsetools/'>summary</a><a class='button' href='/eclipsetools/log/'>log</a><a class='button' href='/eclipsetools/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='oetest' href='/oetest/'>oetest</a></td><td><a href='/oetest/'>Test utilities for OpenEmbedded</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2009-08-14 14:10:25 +0000'>10 years</span></td><td><a class='button' href='/oetest/'>summary</a><a class='button' href='/oetest/log/'>log</a><a class='button' href='/oetest/tree/'>tree</a></td></tr>
+<tr><td class='toplevel-repo'><a title='oebuildstats' href='/oebuildstats/'>oebuildstats</a></td><td><a href='/oebuildstats/'>OE Build Stats</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td></td><td><a class='button' href='/oebuildstats/'>summary</a><a class='button' href='/oebuildstats/log/'>log</a><a class='button' href='/oebuildstats/tree/'>tree</a></td></tr>
+</table></div> <!-- class=content -->
+<div class='footer'>generated by <a href='https://git.zx2c4.com/cgit/about/'>cgit v1.2</a> (<a href='https://git-scm.com/'>git 2.18.0</a>) at 2019-06-26 18:03:12 +0000</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from urllib.parse import urlparse
+
+from swh.lister.cgit.lister import find_netloc, get_repo_list
+
+
+def test_get_repo_list():
+ f = open('swh/lister/cgit/tests/response.html')
+ repos = get_repo_list(f.read())
+ f = open('swh/lister/cgit/tests/repo_list.txt')
+ expected_repos = f.readlines()
+ expected_repos = list(map((lambda repo: repo[:-1]), expected_repos))
+ assert len(repos) == len(expected_repos)
+ for i in range(len(repos)):
+ assert str(repos[i]) == expected_repos[i]
+
+
+def test_find_netloc():
+ first_url = urlparse('http://git.savannah.gnu.org/cgit/')
+ second_url = urlparse('https://cgit.kde.org/')
+
+ assert find_netloc(first_url) == 'http://git.savannah.gnu.org'
+ assert find_netloc(second_url) == 'https://cgit.kde.org'
diff --git a/swh/lister/cgit/tests/test_tasks.py b/swh/lister/cgit/tests/test_tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/test_tasks.py
@@ -0,0 +1,53 @@
+from unittest.mock import patch
+
+
+def test_ping(swh_app, celery_session_worker):
+ res = swh_app.send_task(
+ 'swh.lister.cgit.tasks.ping')
+ assert res
+ res.wait()
+ assert res.successful()
+ assert res.result == 'OK'
+
+
+@patch('swh.lister.cgit.tasks.CGitLister')
+def test_lister_no_url_prefix(lister, swh_app, celery_session_worker):
+ # setup the mocked CGitLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.cgit.tasks.CGitListerTask',
+ kwargs=dict(url='https://git.kernel.org/', instance='kernel'))
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with(
+ url='https://git.kernel.org/',
+ url_prefix=None,
+ instance='kernel')
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
+
+
+@patch('swh.lister.cgit.tasks.CGitLister')
+def test_lister_with_url_prefix(lister, swh_app, celery_session_worker):
+ # setup the mocked CGitLister
+ lister.return_value = lister
+ lister.run.return_value = None
+
+ res = swh_app.send_task(
+ 'swh.lister.cgit.tasks.CGitListerTask',
+ kwargs=dict(url='https://cgit.kde.org/',
+ url_prefix='https://anongit.kde.org/', instance='kde'))
+ assert res
+ res.wait()
+ assert res.successful()
+
+ lister.assert_called_once_with(
+ url='https://cgit.kde.org/',
+ url_prefix='https://anongit.kde.org/',
+ instance='kde')
+ lister.db_last_index.assert_not_called()
+ lister.run.assert_called_once_with()
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
- 'npm', 'phabricator', 'gnu', 'cran']
+ 'npm', 'phabricator', 'gnu', 'cran', 'cgit']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@@ -125,6 +125,14 @@
from .cran.lister import CRANLister
_lister = CRANLister(override_config=override_conf)
+ elif lister == 'cgit':
+ from .cgit.models import ModelBase
+ from .cgit.lister import CGitLister
+ _lister = CGitLister(
+ url='http://git.savannah.gnu.org/cgit/',
+ url_prefix='http://git.savannah.gnu.org/git/',
+ override_config=override_conf)
+
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -6,6 +6,7 @@
def celery_includes():
return [
'swh.lister.bitbucket.tasks',
+ 'swh.lister.cgit.tasks',
'swh.lister.cran.tasks',
'swh.lister.debian.tasks',
'swh.lister.github.tasks',

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 10:39 PM (2 d, 7 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234068

Event Timeline