Page MenuHomeSoftware Heritage

D1950.id6574.diff
No OneTemporary

D1950.id6574.diff

diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -23,9 +23,8 @@
instance = 'bitbucket'
default_min_bound = datetime.utcfromtimestamp(0)
- def __init__(self, api_baseurl=None, override_config=None, per_page=100):
- super().__init__(
- api_baseurl=api_baseurl, override_config=override_config)
+ def __init__(self, url=None, override_config=None, per_page=100):
+ super().__init__(url=url, override_config=override_config)
per_page = self.config.get('per_page', per_page)
self.PATH_TEMPLATE = '%s&pagelen=%s' % (
diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py
--- a/swh/lister/bitbucket/tasks.py
+++ b/swh/lister/bitbucket/tasks.py
@@ -12,20 +12,16 @@
GROUP_SPLIT = 10000
-def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100):
- return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page)
-
-
@app.task(name=__name__ + '.IncrementalBitBucketLister')
def list_bitbucket_incremental(**lister_args):
'''Incremental update of the BitBucket forge'''
- lister = new_lister(**lister_args)
+ lister = BitBucketLister(**lister_args)
lister.run(min_bound=lister.db_last_index(), max_bound=None)
@app.task(name=__name__ + '.RangeBitBucketLister')
def _range_bitbucket_lister(start, end, **lister_args):
- lister = new_lister(**lister_args)
+ lister = BitBucketLister(**lister_args)
lister.run(min_bound=start, max_bound=end)
@@ -36,7 +32,7 @@
It's not to be called for an initial listing.
"""
- lister = new_lister(**lister_args)
+ lister = BitBucketLister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info('Nothing to list')
diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py
--- a/swh/lister/bitbucket/tests/test_tasks.py
+++ b/swh/lister/bitbucket/tests/test_tasks.py
@@ -26,8 +26,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
+ lister.assert_called_once_with()
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
@@ -45,8 +44,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
+ lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@@ -76,8 +74,7 @@
break
sleep(1)
- lister.assert_called_with(
- api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
+ lister.assert_called_with()
# one by the FullBitbucketRelister task
# + 5 for the RangeBitbucketLister subtasks
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -63,7 +63,7 @@
(lister_name, SUPPORTED_LISTERS))
if db_url:
conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
- # To allow api_baseurl override per lister
+
registry_entry = LISTERS[lister_name].load()()
lister_cls = registry_entry['lister']
lister = lister_cls(override_config=conf)
diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py
--- a/swh/lister/core/indexing_lister.py
+++ b/swh/lister/core/indexing_lister.py
@@ -244,6 +244,6 @@
class IndexingHttpLister(ListerHttpTransport, IndexingLister):
"""Convenience class for ensuring right lookup and init order
when combining IndexingLister and ListerHttpTransport."""
- def __init__(self, api_baseurl=None, override_config=None):
+ def __init__(self, url=None, override_config=None):
IndexingLister.__init__(self, override_config=override_config)
- ListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
+ ListerHttpTransport.__init__(self, url=url)
diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py
--- a/swh/lister/core/lister_transports.py
+++ b/swh/lister/core/lister_transports.py
@@ -96,7 +96,7 @@
required.
"""
path = self.PATH_TEMPLATE % identifier
- return self.api_baseurl + path
+ return self.url + path
def request_params(self, identifier):
"""Get the full parameters passed to requests given the
@@ -142,14 +142,14 @@
self.reset_backoff()
return False, 0
- def __init__(self, api_baseurl=None):
- if not api_baseurl:
- api_baseurl = self.config.get('api_baseurl')
- if not api_baseurl:
- api_baseurl = self.DEFAULT_URL
- if not api_baseurl:
- raise NameError('HTTP Lister Transport requires api_baseurl.')
- self.api_baseurl = api_baseurl # eg. 'https://api.github.com'
+ def __init__(self, url=None):
+ if not url:
+ url = self.config.get('url')
+ if not url:
+ url = self.DEFAULT_URL
+ if not url:
+ raise NameError('HTTP Lister Transport requires an url.')
+ self.url = url # eg. 'https://api.github.com'
self.session = requests.Session()
self.lister_version = __version__
@@ -218,7 +218,7 @@
"parse for information")
PATH_TEMPLATE = None # we do not use it
- def __init__(self, api_baseurl=None):
+ def __init__(self, url=None):
self.session = requests.Session()
self.lister_version = __version__
diff --git a/swh/lister/core/page_by_page_lister.py b/swh/lister/core/page_by_page_lister.py
--- a/swh/lister/core/page_by_page_lister.py
+++ b/swh/lister/core/page_by_page_lister.py
@@ -155,6 +155,6 @@
combining PageByPageLister and ListerHttpTransport.
"""
- def __init__(self, api_baseurl=None, override_config=None):
+ def __init__(self, url=None, override_config=None):
PageByPageLister.__init__(self, override_config=override_config)
- ListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
+ ListerHttpTransport.__init__(self, url=url)
diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py
--- a/swh/lister/core/tests/test_lister.py
+++ b/swh/lister/core/tests/test_lister.py
@@ -77,7 +77,7 @@
"""
if override_config or self.fl is None:
- self.fl = self.Lister(api_baseurl='https://fakeurl',
+ self.fl = self.Lister(url='https://fakeurl',
override_config=override_config)
self.fl.INITIAL_BACKOFF = 1
diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py
--- a/swh/lister/debian/lister.py
+++ b/swh/lister/debian/lister.py
@@ -35,7 +35,7 @@
instance = 'debian'
def __init__(self, override_config=None):
- ListerHttpTransport.__init__(self, api_baseurl="bogus")
+ ListerHttpTransport.__init__(self, url="notused")
ListerBase.__init__(self, override_config=override_config)
def transport_request(self, identifier):
diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py
--- a/swh/lister/github/tasks.py
+++ b/swh/lister/github/tasks.py
@@ -12,20 +12,16 @@
GROUP_SPLIT = 10000
-def new_lister(api_baseurl='https://api.github.com', **kw):
- return GitHubLister(api_baseurl=api_baseurl, **kw)
-
-
@app.task(name=__name__ + '.IncrementalGitHubLister')
def list_github_incremental(**lister_args):
'Incremental update of GitHub'
- lister = new_lister(**lister_args)
+ lister = GitHubLister(**lister_args)
lister.run(min_bound=lister.db_last_index(), max_bound=None)
@app.task(name=__name__ + '.RangeGitHubLister')
def _range_github_lister(start, end, **lister_args):
- lister = new_lister(**lister_args)
+ lister = GitHubLister(**lister_args)
lister.run(min_bound=start, max_bound=end)
@@ -36,7 +32,7 @@
It's not to be called for an initial listing.
"""
- lister = new_lister(**lister_args)
+ lister = GitHubLister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info('Nothing to list')
diff --git a/swh/lister/github/tests/test_tasks.py b/swh/lister/github/tests/test_tasks.py
--- a/swh/lister/github/tests/test_tasks.py
+++ b/swh/lister/github/tests/test_tasks.py
@@ -26,7 +26,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(api_baseurl='https://api.github.com')
+ lister.assert_called_once_with()
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
@@ -44,7 +44,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(api_baseurl='https://api.github.com')
+ lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@@ -74,7 +74,7 @@
break
sleep(1)
- lister.assert_called_with(api_baseurl='https://api.github.com')
+ lister.assert_called_with()
# one by the FullGitHubRelister task
# + 5 for the RangeGitHubLister subtasks
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -16,12 +16,11 @@
MODEL = GitLabModel
LISTER_NAME = 'gitlab'
- def __init__(self, api_baseurl=None, instance=None,
+ def __init__(self, url=None, instance=None,
override_config=None, sort='asc', per_page=20):
- super().__init__(api_baseurl=api_baseurl,
- override_config=override_config)
+ super().__init__(url=url, override_config=override_config)
if instance is None:
- instance = parse_url(self.api_baseurl).host
+ instance = parse_url(self.url).host
self.instance = instance
self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % (
self.PATH_TEMPLATE, sort, per_page)
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
--- a/swh/lister/gitlab/tasks.py
+++ b/swh/lister/gitlab/tasks.py
@@ -14,18 +14,11 @@
NBPAGES = 10
-def new_lister(api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='asc', per_page=20):
- return GitLabLister(
- api_baseurl=api_baseurl, instance=instance, sort=sort,
- per_page=per_page)
-
-
@app.task(name=__name__ + '.IncrementalGitLabLister')
def list_gitlab_incremental(**lister_args):
"""Incremental update of a GitLab instance"""
lister_args['sort'] = 'desc'
- lister = new_lister(**lister_args)
+ lister = GitLabLister(**lister_args)
total_pages = lister.get_pages_information()[1]
# stopping as soon as existing origins for that instance are detected
lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
@@ -33,14 +26,14 @@
@app.task(name=__name__ + '.RangeGitLabLister')
def _range_gitlab_lister(start, end, **lister_args):
- lister = new_lister(**lister_args)
+ lister = GitLabLister(**lister_args)
lister.run(min_bound=start, max_bound=end)
@app.task(name=__name__ + '.FullGitLabRelister', bind=True)
def list_gitlab_full(self, **lister_args):
"""Full update of a GitLab instance"""
- lister = new_lister(**lister_args)
+ lister = GitLabLister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
ranges = list(utils.split_range(total_pages, NBPAGES))
random.shuffle(ranges)
diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py
--- a/swh/lister/gitlab/tests/test_tasks.py
+++ b/swh/lister/gitlab/tests/test_tasks.py
@@ -26,9 +26,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='desc', per_page=20)
+ lister.assert_called_once_with(sort='desc')
lister.db_last_index.assert_not_called()
lister.get_pages_information.assert_called_once_with()
lister.run.assert_called_once_with(
@@ -48,9 +46,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='asc', per_page=20)
+ lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@@ -81,9 +77,7 @@
break
sleep(1)
- lister.assert_called_with(
- api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='asc', per_page=20)
+ lister.assert_called_with()
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks
@@ -113,7 +107,7 @@
res = swh_app.send_task(
'swh.lister.gitlab.tasks.FullGitLabRelister',
- kwargs=dict(api_baseurl='https://0xacab.org/api/v4'))
+ kwargs=dict(url='https://0xacab.org/api/v4'))
assert res
res.wait()
@@ -129,9 +123,7 @@
break
sleep(1)
- lister.assert_called_with(
- api_baseurl='https://0xacab.org/api/v4',
- instance=None, sort='asc', per_page=20)
+ lister.assert_called_with(url='https://0xacab.org/api/v4')
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -16,10 +16,9 @@
LISTER_NAME = 'npm'
instance = 'npm'
- def __init__(self, api_baseurl='https://replicate.npmjs.com',
+ def __init__(self, url='https://replicate.npmjs.com',
per_page=1000, override_config=None):
- super().__init__(api_baseurl=api_baseurl,
- override_config=override_config)
+ super().__init__(url=url, override_config=override_config)
self.per_page = per_page + 1
self.PATH_TEMPLATE += '&limit=%s' % self.per_page
@@ -76,7 +75,7 @@
'https://www.npmjs.com/package/%s' % repo_name,
# package metadata url needs to be escaped otherwise some requests
# may fail (for instance when a package name contains '/')
- '%s/%s' % (self.api_baseurl, quote(repo_name, safe=''))
+ '%s/%s' % (self.url, quote(repo_name, safe=''))
)
def string_pattern_check(self, inner, lower, upper=None):
diff --git a/swh/lister/npm/tasks.py b/swh/lister/npm/tasks.py
--- a/swh/lister/npm/tasks.py
+++ b/swh/lister/npm/tasks.py
@@ -14,7 +14,7 @@
@contextmanager
def save_registry_state(lister):
params = {'headers': lister.request_headers()}
- registry_state = lister.session.get(lister.api_baseurl, **params)
+ registry_state = lister.session.get(lister.url, **params)
registry_state = registry_state.json()
keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq',
'disk_size', 'data_size', 'committed_update_seq',
diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py
--- a/swh/lister/phabricator/lister.py
+++ b/swh/lister/phabricator/lister.py
@@ -20,11 +20,10 @@
MODEL = PhabricatorModel
LISTER_NAME = 'phabricator'
- def __init__(self, api_baseurl=None, instance=None, override_config=None):
- super().__init__(api_baseurl=api_baseurl,
- override_config=override_config)
+ def __init__(self, url=None, instance=None, override_config=None):
+ super().__init__(url=url, override_config=override_config)
if not instance:
- instance = urllib.parse.urlparse(self.api_baseurl).hostname
+ instance = urllib.parse.urlparse(self.url).hostname
self.instance = instance
@property
diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py
--- a/swh/lister/phabricator/tests/test_lister.py
+++ b/swh/lister/phabricator/tests/test_lister.py
@@ -33,9 +33,8 @@
]}}
override_config = dict(credentials=credentials,
**(override_config or {}))
- self.fl = self.Lister(
- api_baseurl='https://fakeurl', instance='fake',
- override_config=override_config)
+ self.fl = self.Lister(url='https://fakeurl', instance='fake',
+ override_config=override_config)
self.fl.INITIAL_BACKOFF = 1
self.fl.reset_backoff()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -36,31 +36,31 @@
db_url = init_db().url()
listers = {
- 'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'),
- 'phabricator': (
- 'api_baseurl',
- 'https://somewhere.org/api/diffusion.repository.search'),
+ 'gitlab': 'https://other.gitlab.uni/api/v4/',
+ 'phabricator': 'https://somewhere.org/api/diffusion.repository.search',
+ 'cgit': 'https://some.where/cgit',
}
# check the override ends up defined in the lister
- for lister_name, (url_key, url_value) in listers.items():
+ for lister_name, url in listers.items():
lst = get_lister(
lister_name, db_url, **{
- url_key: url_value,
+ 'url': url,
'priority': 'high',
'policy': 'oneshot',
})
- assert getattr(lst, url_key) == url_value
+ assert lst.url == url
assert lst.config['priority'] == 'high'
assert lst.config['policy'] == 'oneshot'
# check the default urls are used and not the override (since it's not
# passed)
- for lister_name, (url_key, url_value) in listers.items():
+ for lister_name, url in listers.items():
lst = get_lister(lister_name, db_url)
# no override so this does not end up in lister's configuration
- assert url_key not in lst.config
+ assert 'url' not in lst.config
assert 'priority' not in lst.config
assert 'oneshot' not in lst.config
+ assert lst.url == lst.DEFAULT_URL

File Metadata

Mime Type
text/plain
Expires
Jan 30 2025, 10:52 AM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222772

Event Timeline