Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122986
D352.id1129.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
37 KB
Subscribers
None
D352.id1129.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -41,30 +41,90 @@
## lister-github
-1. git clone under $GHLISTER_ROOT (of your choosing)
+### Preparation steps
+
+1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing)
2. mkdir ~/.config/swh/ ~/.cache/swh/lister/github.com/
3. create configuration file ~/.config/swh/lister-github.com.yml
4. Bootstrap the db instance schema
-``` sh
-$ createdb lister-github.com
-$ bin/ghlister --db-url postgres:///lister-github.com createdb
-```
-
-Configuration file samples
--------------------------
-
-## github
+ $ createdb lister-github
+ $ python3 -m swh.lister.cli --db-url postgres:///lister-github github --createdb
-cat ~/.config/swh/lister-github.com.yml
+### Configuration file sample
+ $ cat ~/.config/swh/lister-github.com.yml
# see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
- lister_db_url: postgres:///lister-github.com
+ lister_db_url: postgres:///lister-github
credentials: []
cache_responses: True
cache_dir: /home/zack/.cache/swh/lister/github.com
+ storage: # to avoid having to run yet another service
+ cls: local
+ args:
+ db: service=swh-dev
+ objstorage:
+ cls: pathslicing
+ args:
+ root: /home/storage/swh-storage/
+ slicing: 0:1/1:5
+
+### Run
+
+ $ python3
+ >>> import logging
+ >>> logging.basicConfig(level=logging.DEBUG)
+ >>> from swh.lister.github.tasks import RangeGitHubLister
+ >>> RangeGitHubLister().run(364, 365)
+ INFO:root:listing repos starting at 364
+ DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.github.com
+ DEBUG:urllib3.connectionpool:https://api.github.com:443 "GET /repositories?since=364 HTTP/1.1" 200 None
+ DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost
+ DEBUG:urllib3.connectionpool:http://localhost:5002 "POST /origin/add HTTP/1.1" 200 1
+
+
+## lister-gitlab
+
+### preparation steps
+
+1. git clone under $SWH_ENVIRONMENT_HOME/swh-lister (of your choosing)
+2. mkdir ~/.config/swh/ ~/.cache/swh/lister/gitlab/
+3. create configuration file ~/.config/swh/lister-gitlab.yml
+4. Bootstrap the db instance schema
+ $ createdb lister-gitlab
+ $ python3 -m swh.lister.cli --db-url postgres:///lister-gitlab gitlab --createdb
+
+### Configuration file sample
+
+ $ cat ~/.config/swh/lister-gitlab.yml
+ # see http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
+ lister_db_url: postgres:///lister-gitlab
+ credentials: []
+ cache_responses: True
+ cache_dir: /home/zack/.cache/swh/lister/gitlab
storage:
- cls: remote
+ cls: local
args:
- url: http://localhost:5002/
+ db: service=swh-dev
+ objstorage:
+ cls: pathslicing
+ args:
+ root: /home/storage/swh-storage/
+ slicing: 0:1/1:5
+
+### Run
+
+ $ python3
+ Python 3.6.6 (default, Jun 27 2018, 14:44:17)
+ [GCC 8.1.0] on linux
+ Type "help", "copyright", "credits" or "license" for more information.
+ >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
+ lister_name='salsa.debian.org', api_baseurl='https://salsa.debian.org/api/v4')
+ >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
+ lister_name='gitlab.freedesktop.org', api_baseurl='https://gitlab.freedesktop.org/api/v4')
+ >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
+ lister_name='gitlab.gnome.org', api_baseurl='https://gitlab.gnome.org/api/v4')
+ >>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
+ lister_name='gitlab.inria.fr', api_baseurl='https://gitlab.inria.fr/api/v4')
+ >>>
diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py
--- a/swh/lister/bitbucket/tasks.py
+++ b/swh/lister/bitbucket/tasks.py
@@ -1,16 +1,16 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
- IndexingRangeListerTask,
+ RangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from .lister import BitBucketLister
class BitBucketListerTask(ListerTaskBase):
- def new_lister(self):
+ def new_lister(self, *args, **kwargs):
return BitBucketLister(lister_name='bitbucket.com',
api_baseurl='https://api.bitbucket.org/2.0')
@@ -20,7 +20,7 @@
task_queue = 'swh_lister_bitbucket_discover'
-class RangeBitBucketLister(BitBucketListerTask, IndexingRangeListerTask):
+class RangeBitBucketLister(BitBucketListerTask, RangeListerTask):
task_queue = 'swh_lister_bitbucket_refresh'
diff --git a/swh/lister/bitbucket/tests/test_bb_lister.py b/swh/lister/bitbucket/tests/test_bb_lister.py
--- a/swh/lister/bitbucket/tests/test_bb_lister.py
+++ b/swh/lister/bitbucket/tests/test_bb_lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,10 +6,10 @@
import unittest
from swh.lister.bitbucket.lister import BitBucketLister
-from swh.lister.core.tests.test_lister import IndexingHttpListerTesterBase
+from swh.lister.core.tests.test_lister import HttpListerTesterBase
-class BitBucketListerTester(IndexingHttpListerTesterBase, unittest.TestCase):
+class BitBucketListerTester(HttpListerTesterBase, unittest.TestCase):
Lister = BitBucketLister
test_re = re.compile(r'/repositories\?after=([^?&]+)')
lister_subdir = 'bitbucket'
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/cli.py
@@ -0,0 +1,98 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+
+
+CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+
+
+@click.group(context_settings=CONTEXT_SETTINGS)
+@click.option(
+ '--db-url', '-d', default='postgres:///lister-gitlab.com',
+ help='SQLAlchemy DB URL; see '
+ '<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa
+@click.pass_context
+def cli(ctx, db_url):
+ """Initialize db model according to lister.
+
+ """
+ config = {}
+ if db_url:
+ config['db_url'] = db_url
+ ctx.obj = config
+
+
+@cli.command('github')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def github(ctx, createdb, dropdb):
+ from .github import models
+ from .github.lister import GitHubLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = GitHubLister(lister_name='github.com',
+ api_baseurl='https://api.github.com',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+@cli.command('gitlab')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def gitlab(ctx, createdb, dropdb):
+ from .gitlab import models
+ from .gitlab.lister import GitLabLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = GitLabLister(lister_name='gitlab.com',
+ api_baseurl='https://gitlab.com/api/v4/',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+@cli.command('bitbucket')
+@click.option('--createdb', is_flag=True, default=False,
+ help='create db')
+@click.option('--dropdb', is_flag=True, default=False,
+ help='Drop db')
+@click.pass_context
+def bitbucket(ctx, createdb, dropdb):
+ from .bitbucket import models
+ from .bitbucket.lister import BitBucketLister
+
+ override_conf = {'lister_db_url': ctx.obj['db_url']}
+
+ lister = BitBucketLister(lister_name='bitbucket.com',
+ api_baseurl='https://api.bitbucket.org/2.0',
+ override_config=override_conf)
+
+ if dropdb:
+ models.ModelBase.metadata.drop_all(lister.db_engine)
+
+ if createdb:
+ models.ModelBase.metadata.create_all(lister.db_engine)
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/swh/lister/core/models.py b/swh/lister/core/models.py
--- a/swh/lister/core/models.py
+++ b/swh/lister/core/models.py
@@ -42,10 +42,13 @@
def __init__(self, uid=None, indexable=None, name=None, full_name=None,
html_url=None, origin_url=None, origin_type=None,
- description=None, task_id=None, origin_id=None):
+ description=None, task_id=None, origin_id=None,
+ instance=None):
self.uid = uid
self.last_seen = datetime.now()
+ if instance is not None:
+ self.instance = instance
if indexable is not None:
self.indexable = indexable
if name is not None:
diff --git a/swh/lister/core/paging_lister.py b/swh/lister/core/paging_lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/core/paging_lister.py
@@ -0,0 +1,117 @@
+# Copyright (C) 2015-2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import abc
+import logging
+
+from .lister_transports import SWHListerHttpTransport
+from .lister_base import SWHListerBase
+
+
+class SWHPagingLister(SWHListerBase):
+ """Lister* intermediate class for any service that follows the simple
+ pagination page pattern.
+
+ - Client sends a request to list repositories starting from a
+ given page identifier.
+
+ - Client receives structured (json/xml/etc) response with
+ information about a sequential series of repositories (per page)
+ starting from a given index. And, if available, some indication
+ of the next page index for fetching the remaining repository
+ data.
+
+ See :class:`swh.lister.core.lister_base.SWHListerBase` for more
+ details.
+
+ This class cannot be instantiated. To create a new Lister for a
+ source code listing service that follows the model described
+ above, you must subclass this class. Then provide the required
+ overrides in addition to any unmet implementation/override
+ requirements of this class's base (see parent class and member
+ docstrings for details).
+
+ Required Overrides::
+
+ def get_next_target_from_response
+
+ """
+ @abc.abstractmethod
+ def get_next_target_from_response(self, response):
+ """Find the next server endpoint page given the entire response.
+
+ Implementation of this method depends on the server API spec
+ and the shape of the network response object returned by the
+ transport_request method.
+
+ For example, some api can use the headers links to provide the
+ next page.
+
+ Args:
+ response (transport response): response page from the server
+
+ Returns:
+ index of next page, possibly extracted from a next href url
+
+ """
+ pass
+
+ # You probably don't need to override anything below this line.
+
+ def run(self, min_index=None, max_index=None):
+ """Main entry function. Sequentially fetches repository data from the
+ service according to the basic outline in the class
+ docstring. Continually fetching sublists until either there
+ is no next index reference given or the given next index is
+ greater than the desired max_index.
+
+ Args:
+ min_index (indexable type): optional index to start from
+ max_index (indexable type): optional index to stop at
+
+ Returns:
+ nothing
+
+ """
+ index = min_index or ''
+ loop_count = 0
+ self.min_index = min_index
+ self.max_index = max_index
+
+ while self.is_within_bounds(index, self.min_index, self.max_index):
+ logging.info('listing repos starting at %s' % index)
+
+ response, injected_repos = self.ingest_data(index)
+ next_index = self.get_next_target_from_response(response)
+
+ # termination condition
+
+ if (next_index is None) or (next_index == index):
+ logging.info('stopping after index %s, no next link found' %
+ index)
+ break
+ else:
+ index = next_index
+
+ loop_count += 1
+ if loop_count == 20:
+ logging.info('flushing updates')
+ loop_count = 0
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+
+class SWHPagingHttpLister(SWHListerHttpTransport, SWHPagingLister):
+ """Convenience class for ensuring right lookup and init order when
+ combining SWHPagingLister and SWHListerHttpTransport.
+
+ """
+ def __init__(self, lister_name=None, api_baseurl=None,
+ override_config=None):
+ SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
+ SWHPagingLister.__init__(self, lister_name=lister_name,
+ override_config=override_config)
diff --git a/swh/lister/core/tasks.py b/swh/lister/core/tasks.py
--- a/swh/lister/core/tasks.py
+++ b/swh/lister/core/tasks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -39,34 +39,50 @@
task_queue = AbstractAttribute('Celery Task queue name')
@abc.abstractmethod
- def new_lister(self):
+ def new_lister(self, *args, **kwargs):
"""Return a new lister of the appropriate type.
"""
pass
@abc.abstractmethod
- def run_task(self):
+ def run_task(self, *args, **kwargs):
pass
-class IndexingDiscoveryListerTask(ListerTaskBase):
- def run_task(self):
- lister = self.new_lister()
- return lister.run(min_index=lister.db_last_index(), max_index=None)
+# Paging/Indexing lister tasks derivatives (cf. {gitlab}/tasks)
-class IndexingRangeListerTask(ListerTaskBase):
- def run_task(self, start, end):
- lister = self.new_lister()
+class RangeListerTask(ListerTaskBase):
+ """Range indexing lister task.
+
+ """
+ def run_task(self, start, end, *args, **kwargs):
+ lister = self.new_lister(*args, **kwargs)
return lister.run(min_index=start, max_index=end)
+# Indexing Lister tasks derivatives (cf. {github/bitbucket}/tasks)
+
+
+class IndexingDiscoveryListerTask(ListerTaskBase):
+ """Incremental indexing lister task.
+
+ """
+ def run_task(self, *args, **kwargs):
+ lister = self.new_lister(*args, **kwargs)
+ return lister.run(min_index=lister.db_last_index(), max_index=None)
+
+
class IndexingRefreshListerTask(ListerTaskBase):
+ """Full indexing lister task.
+
+ """
GROUP_SPLIT = 10000
- def run_task(self):
- lister = self.new_lister()
+ def run_task(self, *args, **kwargs):
+ lister = self.new_lister(*args, **kwargs)
ranges = lister.db_partition_indices(self.GROUP_SPLIT)
random.shuffle(ranges)
- range_task = IndexingRangeListerTask()
- group(range_task.s(minv, maxv) for minv, maxv in ranges)()
+ range_task = RangeListerTask()
+ group(range_task.s(minv, maxv, *args, **kwargs)
+ for minv, maxv in ranges)()
diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py
--- a/swh/lister/core/tests/test_lister.py
+++ b/swh/lister/core/tests/test_lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -20,12 +20,15 @@
@requests_mock.Mocker()
-class IndexingHttpListerTesterBase(abc.ABC):
+class HttpListerTesterBase(abc.ABC):
"""Base testing class for subclasses of
- swh.lister.core.indexing_lister.SWHIndexingHttpLister.
- See swh.lister.github.tests.test_gh_lister for an example of how to
- customize for a specific listing service.
+ swh.lister.core.indexing_lister.SWHIndexingHttpLister.
+ swh.lister.core.paging_lister.SWHPagingHttpLister
+
+ See swh.lister.github.tests.test_gh_lister for an example of how
+ to customize for a specific listing service.
+
"""
Lister = AbstractAttribute('The lister class to test')
test_re = AbstractAttribute('Compiled regex matching the server url. Must'
@@ -56,7 +59,7 @@
self.response = None
self.fl = None
self.helper = None
- if self.__class__ != IndexingHttpListerTesterBase:
+ if self.__class__ != HttpListerTesterBase:
self.run = TestCase.run.__get__(self, self.__class__)
else:
self.run = noop
@@ -99,6 +102,9 @@
return self.mock_limit_n_response(2, request, context)
def get_fl(self, override_config=None):
+ """Retrieve an instance of fake lister (fl).
+
+ """
if override_config or self.fl is None:
with patch(
'swh.scheduler.backend.SchedulerBackend.reconnect', noop
@@ -164,7 +170,7 @@
self.assertIsInstance(di, dict)
pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')]
for k in pubs:
- if k not in ['last_seen', 'task_id', 'origin_id']:
+ if k not in ['last_seen', 'task_id', 'origin_id', 'id']:
self.assertIn(k, di)
def disable_storage_and_scheduler(self, fl):
@@ -221,11 +227,14 @@
self.disable_storage_and_scheduler(fl)
- fl.run(min_index=self.first_index)
-
- self.assertEqual(fl.db_last_index(), self.last_index)
- partitions = fl.db_partition_indices(5)
- self.assertGreater(len(partitions), 0)
- for k in partitions:
- self.assertLessEqual(len(k), 5)
- self.assertGreater(len(k), 0)
+ # FIXME: Separate the tests properly for the gitlab lister
+ # did not succeed yet
+ if hasattr(fl, 'db_last_index'):
+ fl.run(min_index=self.first_index)
+
+ self.assertEqual(fl.db_last_index(), self.last_index)
+ partitions = fl.db_partition_indices(5)
+ self.assertGreater(len(partitions), 0)
+ for k in partitions:
+ self.assertLessEqual(len(k), 5)
+ self.assertGreater(len(k), 0)
diff --git a/swh/lister/debian/tasks.py b/swh/lister/debian/tasks.py
--- a/swh/lister/debian/tasks.py
+++ b/swh/lister/debian/tasks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -10,9 +10,9 @@
class DebianListerTask(ListerTaskBase):
task_queue = 'swh_lister_debian'
- def new_lister(self):
+ def new_lister(self, *args, **kwargs):
return DebianLister()
- def run_task(self, distribution):
+ def run_task(self, distribution, *args, **kwargs):
lister = self.new_lister()
return lister.run(distribution)
diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py
--- a/swh/lister/github/tasks.py
+++ b/swh/lister/github/tasks.py
@@ -1,16 +1,16 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
- IndexingRangeListerTask,
+ RangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from .lister import GitHubLister
class GitHubListerTask(ListerTaskBase):
- def new_lister(self):
+ def new_lister(self, *args, **kwargs):
return GitHubLister(lister_name='github.com',
api_baseurl='https://api.github.com')
@@ -19,7 +19,7 @@
task_queue = 'swh_lister_github_discover'
-class RangeGitHubLister(GitHubListerTask, IndexingRangeListerTask):
+class RangeGitHubLister(GitHubListerTask, RangeListerTask):
task_queue = 'swh_lister_github_refresh'
diff --git a/swh/lister/github/tests/test_gh_lister.py b/swh/lister/github/tests/test_gh_lister.py
--- a/swh/lister/github/tests/test_gh_lister.py
+++ b/swh/lister/github/tests/test_gh_lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,11 +6,11 @@
import unittest
from datetime import datetime, timedelta
-from swh.lister.core.tests.test_lister import IndexingHttpListerTesterBase
+from swh.lister.core.tests.test_lister import HttpListerTesterBase
from swh.lister.github.lister import GitHubLister
-class GitHubListerTester(IndexingHttpListerTesterBase, unittest.TestCase):
+class GitHubListerTester(HttpListerTesterBase, unittest.TestCase):
Lister = GitHubLister
test_re = re.compile(r'/repositories\?since=([^?&]+)')
lister_subdir = 'github'
diff --git a/swh/lister/gitlab/__init__.py b/swh/lister/gitlab/__init__.py
new file mode 100644
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/lister.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import random
+import re
+import time
+
+from ..core.paging_lister import SWHPagingHttpLister
+from .models import GitLabModel
+
+
+class GitLabLister(SWHPagingHttpLister):
+ # Template path expecting an integer that represents the page id
+ PATH_TEMPLATE = '/projects?page=%d&order_by=id&sort=asc&simple=true'
+ API_URL_INDEX_RE = re.compile(r'^.*/projects.*page=(\d+).*')
+ MODEL = GitLabModel
+
+ @property
+ def CONFIG_BASE_FILENAME(self):
+ """One gitlab lister for all instances. We discriminate between the
+ origin on a per instance basis in the table.
+
+ """
+ return 'lister-gitlab'
+
+ @property
+ def ADDITIONAL_CONFIG(self):
+ """Override additional config as the 'credentials' structure change
+ between the ancestor classes and this class.
+
+ cf. request_params method below
+
+ """
+ return {
+ 'lister_db_url':
+ ('str', 'postgresql:///lister-%s' % self.lister_name),
+ 'credentials': # credentials is a dict
+ ('dict', {}),
+ 'cache_responses':
+ ('bool', False),
+ 'cache_dir':
+ ('str', '~/.cache/swh/lister/%s' % self.lister_name),
+ }
+
+ def request_params(self, identifier):
+ """Get the full parameters passed to requests given the
+ transport_request identifier.
+
+ For the gitlab lister, the 'credentials' entries is configured
+ per instance. For example:
+
+ - credentials:
+ - gitlab.com:
+ - username: user0
+ password: <pass>
+ - username: user1
+ password: <pass>
+ - ...
+ - other-gitlab-instance:
+ ...
+
+ """
+ params = {
+ 'headers': self.request_headers() or {}
+ }
+ # Retrieve the credentials per instance
+ creds = self.config['credentials']
+ if creds:
+ creds_lister = creds[self.lister_name]
+ auth = random.choice(creds_lister) if creds else None
+ if auth:
+ params['auth'] = (auth['username'], auth['password'])
+ return params
+
+ def get_model_from_repo(self, repo):
+ return {
+ 'instance': self.lister_name,
+ 'uid': repo['id'],
+ 'indexable': repo['id'],
+ 'name': repo['name'],
+ 'full_name': repo['path_with_namespace'],
+ 'html_url': repo['web_url'],
+ 'origin_url': repo['http_url_to_repo'],
+ 'origin_type': 'git',
+ 'description': repo['description'],
+ }
+
+ def transport_quota_check(self, response):
+ """Deal with rate limit if any.
+
+ """
+ # not all gitlab instance have rate limit
+ if 'RateLimit-Remaining' in response.headers:
+ reqs_remaining = int(response.headers['RateLimit-Remaining'])
+ if response.status_code == 403 and reqs_remaining == 0:
+ reset_at = int(response.headers['RateLimit-Reset'])
+ delay = min(reset_at - time.time(), 3600)
+ return True, delay
+ return False, 0
+
+ def get_next_target_from_response(self, response):
+ """Deal with pagination
+
+ """
+ if 'next' in response.links:
+ next_url = response.links['next']['url']
+ return int(self.API_URL_INDEX_RE.match(next_url).group(1))
+ return None
+
+ def transport_response_simplified(self, response):
+ repos = response.json()
+ return [self.get_model_from_repo(repo) for repo in repos]
diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/models.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, Integer, String
+
+from ..core.models import ModelBase
+
+
+class GitLabModel(ModelBase):
+ """a Gitlab repository"""
+ __tablename__ = 'gitlab_repo'
+
+ id = Column(Integer, primary_key=True)
+ uid = Column(Integer, index=True)
+ instance = Column(String, index=True)
+ indexable = Column(Integer, index=True)
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/tasks.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2018 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.core.tasks import ListerTaskBase, RangeListerTask
+
+
+from .lister import GitLabLister
+
+
+class GitLabDotComListerTask(ListerTaskBase):
+ def new_lister(self, lister_name='gitlab.com',
+ api_baseurl='https://gitlab.com/api/v4'):
+ return GitLabLister(
+ lister_name=lister_name, api_baseurl=api_baseurl)
+
+
+class RangeGitLabLister(GitLabDotComListerTask, RangeListerTask):
+ """GitLab lister working on specified range (start, end) arguments.
+
+ """
+ task_queue = 'swh_lister_gitlab_refresh'
+
+
diff --git a/swh/lister/gitlab/tests/__init__.py b/swh/lister/gitlab/tests/__init__.py
new file mode 100644
diff --git a/swh/lister/gitlab/tests/api_empty_response.json b/swh/lister/gitlab/tests/api_empty_response.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/tests/api_empty_response.json
@@ -0,0 +1 @@
+[]
diff --git a/swh/lister/gitlab/tests/api_response.json b/swh/lister/gitlab/tests/api_response.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/tests/api_response.json
@@ -0,0 +1,170 @@
+[{"avatar_url": null,
+ "created_at": "2012-10-15T17:26:53.000Z",
+ "default_branch": "master",
+ "description": null,
+ "forks_count": 3,
+ "http_url_to_repo": "https://gitlab.com/leberwurscht/teardownwalls.git",
+ "id": 143,
+ "last_activity_at": "2013-10-03T08:08:46.000Z",
+ "name": "TearDownWalls",
+ "name_with_namespace": "Leberwurscht / TearDownWalls",
+ "path": "teardownwalls",
+ "path_with_namespace": "leberwurscht/teardownwalls",
+ "readme_url": "https://gitlab.com/leberwurscht/teardownwalls/blob/master/README.md",
+ "ssh_url_to_repo": "git@gitlab.com:leberwurscht/teardownwalls.git",
+ "star_count": 1,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/leberwurscht/teardownwalls"},
+ {"avatar_url": null,
+ "created_at": "2012-12-12T21:30:14.000Z",
+ "default_branch": "master",
+ "description": "",
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/technomancy/leiningen.git",
+ "id": 450,
+ "last_activity_at": "2018-06-24T00:07:06.666Z",
+ "name": "Leiningen",
+ "name_with_namespace": "Phil Hagelberg / Leiningen",
+ "path": "leiningen",
+ "path_with_namespace": "technomancy/leiningen",
+ "readme_url": "https://gitlab.com/technomancy/leiningen/blob/master/README.md",
+ "ssh_url_to_repo": "git@gitlab.com:technomancy/leiningen.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/technomancy/leiningen"},
+ {"avatar_url": null,
+ "created_at": "2012-12-18T17:25:39.000Z",
+ "default_branch": "master",
+ "description": null,
+ "forks_count": 4,
+ "http_url_to_repo": "https://gitlab.com/jonan/heroes-of-wesnoth.git",
+ "id": 526,
+ "last_activity_at": "2015-04-09T14:43:49.363Z",
+ "name": "Heroes of Wesnoth",
+ "name_with_namespace": "Jonan / Heroes of Wesnoth",
+ "path": "heroes-of-wesnoth",
+ "path_with_namespace": "jonan/heroes-of-wesnoth",
+ "readme_url": null,
+ "ssh_url_to_repo": "git@gitlab.com:jonan/heroes-of-wesnoth.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/jonan/heroes-of-wesnoth"},
+ {"avatar_url": null,
+ "created_at": "2012-12-18T17:33:03.000Z",
+ "default_branch": "master",
+ "description": null,
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/jonan/k.git",
+ "id": 527,
+ "last_activity_at": "2014-10-11T22:29:04.138Z",
+ "name": "K",
+ "name_with_namespace": "Jonan / K",
+ "path": "k",
+ "path_with_namespace": "jonan/k",
+ "readme_url": "https://gitlab.com/jonan/k/blob/master/README",
+ "ssh_url_to_repo": "git@gitlab.com:jonan/k.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/jonan/k"},
+ {"avatar_url": null,
+ "created_at": "2013-01-06T20:35:42.000Z",
+ "default_branch": "master",
+ "description": "",
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/hcs/hcs_utils.git",
+ "id": 1025,
+ "last_activity_at": "2015-09-14T12:01:11.151Z",
+ "name": "hcs_utils",
+ "name_with_namespace": "Christer Sjöholm / hcs_utils",
+ "path": "hcs_utils",
+ "path_with_namespace": "hcs/hcs_utils",
+ "readme_url": "https://gitlab.com/hcs/hcs_utils/blob/master/README.txt",
+ "ssh_url_to_repo": "git@gitlab.com:hcs/hcs_utils.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/hcs/hcs_utils"},
+ {"avatar_url": null,
+ "created_at": "2013-01-24T08:41:56.000Z",
+ "default_branch": null,
+ "description": null,
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/soeren/sspssptest.git",
+ "id": 1702,
+ "last_activity_at": "2013-10-03T08:31:54.000Z",
+ "name": "sspssptest",
+ "name_with_namespace": "kruemel / sspssptest",
+ "path": "sspssptest",
+ "path_with_namespace": "soeren/sspssptest",
+ "readme_url": null,
+ "ssh_url_to_repo": "git@gitlab.com:soeren/sspssptest.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/soeren/sspssptest"},
+ {"avatar_url": null,
+ "created_at": "2013-01-28T22:59:31.000Z",
+ "default_branch": "master",
+ "description": null,
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/dpp/slothbeast.git",
+ "id": 1865,
+ "last_activity_at": "2013-05-05T09:44:57.000Z",
+ "name": "slothbeast",
+ "name_with_namespace": "David Pollak / slothbeast",
+ "path": "slothbeast",
+ "path_with_namespace": "dpp/slothbeast",
+ "readme_url": "https://gitlab.com/dpp/slothbeast/blob/master/README.md",
+ "ssh_url_to_repo": "git@gitlab.com:dpp/slothbeast.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/dpp/slothbeast"},
+ {"avatar_url": null,
+ "created_at": "2013-02-07T20:50:20.000Z",
+ "default_branch": "master",
+ "description": null,
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/rocksoniko/easy.git",
+ "id": 2227,
+ "last_activity_at": "2013-05-05T09:45:00.000Z",
+ "name": "easy",
+ "name_with_namespace": "Hugo / easy",
+ "path": "easy",
+ "path_with_namespace": "rocksoniko/easy",
+ "readme_url": "https://gitlab.com/rocksoniko/easy/blob/master/README",
+ "ssh_url_to_repo": "git@gitlab.com:rocksoniko/easy.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/rocksoniko/easy"},
+ {"avatar_url": null,
+ "created_at": "2013-02-10T17:21:24.000Z",
+ "default_branch": null,
+ "description": null,
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/grup/grup.git",
+ "id": 2294,
+ "last_activity_at": "2013-05-05T09:45:01.000Z",
+ "name": "grup",
+ "name_with_namespace": "grup / grup",
+ "path": "grup",
+ "path_with_namespace": "grup/grup",
+ "readme_url": null,
+ "ssh_url_to_repo": "git@gitlab.com:grup/grup.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/grup/grup"},
+ {"avatar_url": null,
+ "created_at": "2013-02-14T09:31:50.000Z",
+ "default_branch": "master",
+ "description": "",
+ "forks_count": 0,
+ "http_url_to_repo": "https://gitlab.com/varac/test.git",
+ "id": 2390,
+ "last_activity_at": "2016-02-11T13:51:47.463Z",
+ "name": "test",
+ "name_with_namespace": "varac / test",
+ "path": "test",
+ "path_with_namespace": "varac/test",
+ "readme_url": null,
+ "ssh_url_to_repo": "git@gitlab.com:varac/test.git",
+ "star_count": 0,
+ "tag_list": [],
+ "web_url": "https://gitlab.com/varac/test"}]
diff --git a/swh/lister/github/tests/test_gh_lister.py b/swh/lister/gitlab/tests/test_gitlab_lister.py
copy from swh/lister/github/tests/test_gh_lister.py
copy to swh/lister/gitlab/tests/test_gitlab_lister.py
--- a/swh/lister/github/tests/test_gh_lister.py
+++ b/swh/lister/gitlab/tests/test_gitlab_lister.py
@@ -1,37 +1,37 @@
-# Copyright (C) 2017 the Software Heritage developers
+# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import re
import unittest
+
from datetime import datetime, timedelta
-from swh.lister.core.tests.test_lister import IndexingHttpListerTesterBase
-from swh.lister.github.lister import GitHubLister
+from swh.lister.gitlab.lister import GitLabLister
+from swh.lister.core.tests.test_lister import HttpListerTesterBase
-class GitHubListerTester(IndexingHttpListerTesterBase, unittest.TestCase):
- Lister = GitHubLister
- test_re = re.compile(r'/repositories\?since=([^?&]+)')
- lister_subdir = 'github'
+class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
+ Lister = GitLabLister
+ test_re = GitLabLister.API_URL_INDEX_RE
+ lister_subdir = 'gitlab'
good_api_response_file = 'api_response.json'
bad_api_response_file = 'api_empty_response.json'
- first_index = 26
- last_index = 368
- entries_per_page = 100
+ first_index = 1
+ last_index = 2
+ entries_per_page = 10
def response_headers(self, request):
- headers = {'X-RateLimit-Remaining': '1'}
+ headers = {'RateLimit-Remaining': '1'}
if self.request_index(request) == str(self.first_index):
headers.update({
- 'Link': '<https://api.github.com/repositories?since=367>;'
+ 'Link': '<https://gitlab.com/v4/projects?page=2>;'
' rel="next",'
- '<https://api.github.com/repositories{?since}>;'
+ '<https://gitlab.com/v4/projects{?page}>;'
' rel="first"'
})
else:
headers.update({
- 'Link': '<https://api.github.com/repositories{?since}>;'
+ 'Link': '<https://gitlab.com/v4/projects{?page}>;'
' rel="first"'
})
@@ -40,7 +40,7 @@
def mock_rate_quota(self, n, request, context):
self.rate_limit += 1
context.status_code = 403
- context.headers['X-RateLimit-Remaining'] = '0'
+ context.headers['RateLimit-Remaining'] = '0'
one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp())
- context.headers['X-RateLimit-Reset'] = str(one_second)
+ context.headers['RateLimit-Reset'] = str(one_second)
return '{"error":"dummy"}'
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 2:38 PM (2 d, 22 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231605
Attached To
D352: Bootstrap gitlab lister
Event Timeline
Log In to Comment