diff --git a/swh/lister/bitbucket/models.py b/swh/lister/bitbucket/models.py index 07b78f2..65fba9c 100644 --- a/swh/lister/bitbucket/models.py +++ b/swh/lister/bitbucket/models.py @@ -1,15 +1,15 @@ -# Copyright (C) 2017 the Software Heritage developers +# Copyright (C) 2017-2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from sqlalchemy import Column, String -from swh.lister.core.models import ModelBase +from swh.lister.core.models import IndexingModelBase -class BitBucketModel(ModelBase): +class BitBucketModel(IndexingModelBase): """a BitBucket repository""" __tablename__ = 'bitbucket_repos' uid = Column(String, primary_key=True) indexable = Column(String, index=True) diff --git a/swh/lister/core/models.py b/swh/lister/core/models.py index 46dd684..589918d 100644 --- a/swh/lister/core/models.py +++ b/swh/lister/core/models.py @@ -1,67 +1,82 @@ # Copyright (C) 2015-2017 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import abc from datetime import datetime from sqlalchemy import Column, DateTime, Integer, String from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta from .abstractattribute import AbstractAttribute SQLBase = declarative_base() class ABCSQLMeta(abc.ABCMeta, DeclarativeMeta): pass class ModelBase(SQLBase, metaclass=ABCSQLMeta): """a common repository""" __abstract__ = True __tablename__ = AbstractAttribute uid = AbstractAttribute('Column(, primary_key=True)') - # The value used for sorting, segmenting, or api query paging, - # because uids aren't always sequential. - indexable = AbstractAttribute('Column(, index=True)') - name = Column(String, index=True) full_name = Column(String, index=True) html_url = Column(String) origin_url = Column(String) origin_type = Column(String) description = Column(String) last_seen = Column(DateTime, nullable=False) task_id = Column(Integer) origin_id = Column(Integer) - def __init__(self, uid=None, indexable=None, name=None, full_name=None, + def __init__(self, uid=None, name=None, full_name=None, html_url=None, origin_url=None, origin_type=None, description=None, task_id=None, origin_id=None): self.uid = uid self.last_seen = datetime.now() - if indexable is not None: - self.indexable = indexable if name is not None: self.name = name if full_name is not None: self.full_name = full_name if html_url is not None: self.html_url = html_url if origin_url is not None: self.origin_url = origin_url if origin_type is not None: self.origin_type = origin_type if description is not None: self.description = description if task_id is not None: self.task_id = task_id if origin_id is not None: self.origin_id = origin_id + + +class IndexingModelBase(ModelBase, metaclass=ABCSQLMeta): + __abstract__ = True + __tablename__ = AbstractAttribute + + # The value used for sorting, segmenting, or api query paging, + # because uids aren't always sequential. + indexable = AbstractAttribute('Column(, index=True)') + + def __init__(self, uid=None, name=None, full_name=None, + html_url=None, origin_url=None, origin_type=None, + description=None, task_id=None, origin_id=None, + indexable=None): + super().__init__( + uid=uid, name=name, full_name=full_name, html_url=html_url, + origin_url=origin_url, origin_type=origin_type, + description=description, task_id=task_id, origin_id=origin_id) + + if indexable is not None: + self.indexable = indexable diff --git a/swh/lister/core/tests/test_model.py b/swh/lister/core/tests/test_model.py index 6eb9a4b..b2c25e1 100644 --- a/swh/lister/core/tests/test_model.py +++ b/swh/lister/core/tests/test_model.py @@ -1,53 +1,94 @@ # Copyright (C) 2017 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from nose.tools import istest from sqlalchemy import Column, Integer -from swh.lister.core.models import ModelBase +from swh.lister.core.models import ModelBase, IndexingModelBase class BadSubclass1(ModelBase): __abstract__ = True pass class BadSubclass2(ModelBase): __abstract__ = True __tablename__ = 'foo' class BadSubclass3(BadSubclass2): __abstract__ = True pass class GoodSubclass(BadSubclass2): uid = Column(Integer, primary_key=True) indexable = Column(Integer, index=True) +class IndexingBadSubclass(IndexingModelBase): + __abstract__ = True + pass + + +class IndexingBadSubclass2(IndexingModelBase): + __abstract__ = True + __tablename__ = 'foo' + + +class IndexingBadSubclass3(IndexingBadSubclass2): + __abstract__ = True + pass + + +class IndexingGoodSubclass(IndexingModelBase): + uid = Column(Integer, primary_key=True) + indexable = Column(Integer, index=True) + __tablename__ = 'bar' + + class TestModel(unittest.TestCase): @istest def test_model_instancing(self): with self.assertRaises(TypeError): ModelBase() with self.assertRaises(TypeError): BadSubclass1() with self.assertRaises(TypeError): BadSubclass2() with self.assertRaises(TypeError): BadSubclass3() self.assertIsInstance(GoodSubclass(), GoodSubclass) - gsc = GoodSubclass(uid='uid', indexable='indexable') + gsc = GoodSubclass(uid='uid') self.assertEqual(gsc.__tablename__, 'foo') self.assertEqual(gsc.uid, 'uid') + + @istest + def test_indexing_model_instancing(self): + with self.assertRaises(TypeError): + IndexingModelBase() + + with self.assertRaises(TypeError): + IndexingBadSubclass() + + with self.assertRaises(TypeError): + IndexingBadSubclass2() + + with self.assertRaises(TypeError): + IndexingBadSubclass3() + + self.assertIsInstance(IndexingGoodSubclass(), IndexingGoodSubclass) + gsc = IndexingGoodSubclass(uid='uid', indexable='indexable') + + self.assertEqual(gsc.__tablename__, 'bar') + self.assertEqual(gsc.uid, 'uid') self.assertEqual(gsc.indexable, 'indexable') diff --git a/swh/lister/github/models.py b/swh/lister/github/models.py index 32055a7..2cb429f 100644 --- a/swh/lister/github/models.py +++ b/swh/lister/github/models.py @@ -1,20 +1,20 @@ -# Copyright (C) 2017 the Software Heritage developers +# Copyright (C) 2017-2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from sqlalchemy import Column, Boolean, Integer -from swh.lister.core.models import ModelBase +from swh.lister.core.models import IndexingModelBase -class GitHubModel(ModelBase): +class GitHubModel(IndexingModelBase): """a GitHub repository""" __tablename__ = 'github_repos' uid = Column(Integer, primary_key=True) indexable = Column(Integer, index=True) fork = Column(Boolean) def __init__(self, *args, **kwargs): self.fork = kwargs.pop('fork', False) super().__init__(*args, **kwargs) diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index b588083..85c0a52 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,122 +1,121 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random import time from .. import utils from ..core.paging_lister import PageByPageHttpLister from .models import GitLabModel class GitLabLister(PageByPageHttpLister): # Template path expecting an integer that represents the page id PATH_TEMPLATE = '/projects?page=%d&order_by=id' MODEL = GitLabModel LISTER_NAME = 'gitlab' def __init__(self, api_baseurl=None, instance=None, override_config=None, sort='asc'): super().__init__(api_baseurl=api_baseurl, override_config=override_config) self.instance = instance self.PATH_TEMPLATE = '%s&sort=%s' % (self.PATH_TEMPLATE, sort) @property def ADDITIONAL_CONFIG(self): """Override additional config as the 'credentials' structure change between the ancestor classes and this class. cf. request_params method below """ default_config = super().ADDITIONAL_CONFIG # 'credentials' is a dict of (instance, {username, password}) dict default_config['credentials'] = ('dict', {}) return default_config def request_params(self, identifier): """Get the full parameters passed to requests given the transport_request identifier. For the gitlab lister, the 'credentials' entries is configured per instance. For example: - credentials: - gitlab.com: - username: user0 password: - username: user1 password: - ... - other-gitlab-instance: ... """ params = { 'headers': self.request_headers() or {} } # Retrieve the credentials per instance creds = self.config['credentials'] if creds: creds_lister = creds[self.instance] auth = random.choice(creds_lister) if creds else None if auth: params['auth'] = (auth['username'], auth['password']) return params def get_model_from_repo(self, repo): return { 'instance': self.instance, 'uid': repo['id'], - 'indexable': repo['id'], 'name': repo['name'], 'full_name': repo['path_with_namespace'], 'html_url': repo['web_url'], 'origin_url': repo['http_url_to_repo'], 'origin_type': 'git', 'description': repo['description'], } def transport_quota_check(self, response): """Deal with rate limit if any. """ # not all gitlab instance have rate limit if 'RateLimit-Remaining' in response.headers: reqs_remaining = int(response.headers['RateLimit-Remaining']) if response.status_code == 403 and reqs_remaining == 0: reset_at = int(response.headers['RateLimit-Reset']) delay = min(reset_at - time.time(), 3600) return True, delay return False, 0 def get_next_target_from_response(self, response): """Determine the next page identifier. """ _next = utils.get(response.headers, ['X-Next-Page', 'x-next-page']) if _next: return int(_next) def get_pages_information(self): """Determine pages information. """ response = self.transport_head(identifier=1) h = response.headers total = utils.get(h, ['X-Total', 'x-total']) total_pages = utils.get(h, ['X-Total-Pages', 'x-total-pages']) per_page = utils.get(h, ['X-Per-Page', 'x-per-page']) if total is not None: total = int(total) if total_pages is not None: total_pages = int(total_pages) if per_page is not None: per_page = int(per_page) return total, total_pages, per_page def transport_response_simplified(self, response): repos = response.json() return [self.get_model_from_repo(repo) for repo in repos] diff --git a/swh/lister/gitlab/models.py b/swh/lister/gitlab/models.py index 6628e63..8b1d950 100644 --- a/swh/lister/gitlab/models.py +++ b/swh/lister/gitlab/models.py @@ -1,30 +1,29 @@ # Copyright (C) 2018 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from sqlalchemy import Column, Integer, String from ..core.models import ModelBase class GitLabModel(ModelBase): """a Gitlab repository from a gitlab instance """ __tablename__ = 'gitlab_repo' id = Column(Integer, primary_key=True) uid = Column(Integer, index=True) instance = Column(String, index=True) - indexable = Column(Integer, index=True) def __init__(self, uid=None, indexable=None, name=None, full_name=None, html_url=None, origin_url=None, origin_type=None, description=None, task_id=None, origin_id=None, instance=None): - super().__init__(uid=uid, indexable=indexable, name=name, + super().__init__(uid=uid, name=name, full_name=full_name, html_url=html_url, origin_url=origin_url, origin_type=origin_type, description=description, task_id=task_id, origin_id=origin_id) self.instance = instance