Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163584
D1950.id6574.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D1950.id6574.diff
View Options
diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -23,9 +23,8 @@
instance = 'bitbucket'
default_min_bound = datetime.utcfromtimestamp(0)
- def __init__(self, api_baseurl=None, override_config=None, per_page=100):
- super().__init__(
- api_baseurl=api_baseurl, override_config=override_config)
+ def __init__(self, url=None, override_config=None, per_page=100):
+ super().__init__(url=url, override_config=override_config)
per_page = self.config.get('per_page', per_page)
self.PATH_TEMPLATE = '%s&pagelen=%s' % (
diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py
--- a/swh/lister/bitbucket/tasks.py
+++ b/swh/lister/bitbucket/tasks.py
@@ -12,20 +12,16 @@
GROUP_SPLIT = 10000
-def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100):
- return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page)
-
-
@app.task(name=__name__ + '.IncrementalBitBucketLister')
def list_bitbucket_incremental(**lister_args):
'''Incremental update of the BitBucket forge'''
- lister = new_lister(**lister_args)
+ lister = BitBucketLister(**lister_args)
lister.run(min_bound=lister.db_last_index(), max_bound=None)
@app.task(name=__name__ + '.RangeBitBucketLister')
def _range_bitbucket_lister(start, end, **lister_args):
- lister = new_lister(**lister_args)
+ lister = BitBucketLister(**lister_args)
lister.run(min_bound=start, max_bound=end)
@@ -36,7 +32,7 @@
It's not to be called for an initial listing.
"""
- lister = new_lister(**lister_args)
+ lister = BitBucketLister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info('Nothing to list')
diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py
--- a/swh/lister/bitbucket/tests/test_tasks.py
+++ b/swh/lister/bitbucket/tests/test_tasks.py
@@ -26,8 +26,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
+ lister.assert_called_once_with()
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
@@ -45,8 +44,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
+ lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@@ -76,8 +74,7 @@
break
sleep(1)
- lister.assert_called_with(
- api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
+ lister.assert_called_with()
# one by the FullBitbucketRelister task
# + 5 for the RangeBitbucketLister subtasks
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -63,7 +63,7 @@
(lister_name, SUPPORTED_LISTERS))
if db_url:
conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
- # To allow api_baseurl override per lister
+
registry_entry = LISTERS[lister_name].load()()
lister_cls = registry_entry['lister']
lister = lister_cls(override_config=conf)
diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py
--- a/swh/lister/core/indexing_lister.py
+++ b/swh/lister/core/indexing_lister.py
@@ -244,6 +244,6 @@
class IndexingHttpLister(ListerHttpTransport, IndexingLister):
"""Convenience class for ensuring right lookup and init order
when combining IndexingLister and ListerHttpTransport."""
- def __init__(self, api_baseurl=None, override_config=None):
+ def __init__(self, url=None, override_config=None):
IndexingLister.__init__(self, override_config=override_config)
- ListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
+ ListerHttpTransport.__init__(self, url=url)
diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py
--- a/swh/lister/core/lister_transports.py
+++ b/swh/lister/core/lister_transports.py
@@ -96,7 +96,7 @@
required.
"""
path = self.PATH_TEMPLATE % identifier
- return self.api_baseurl + path
+ return self.url + path
def request_params(self, identifier):
"""Get the full parameters passed to requests given the
@@ -142,14 +142,14 @@
self.reset_backoff()
return False, 0
- def __init__(self, api_baseurl=None):
- if not api_baseurl:
- api_baseurl = self.config.get('api_baseurl')
- if not api_baseurl:
- api_baseurl = self.DEFAULT_URL
- if not api_baseurl:
- raise NameError('HTTP Lister Transport requires api_baseurl.')
- self.api_baseurl = api_baseurl # eg. 'https://api.github.com'
+ def __init__(self, url=None):
+ if not url:
+ url = self.config.get('url')
+ if not url:
+ url = self.DEFAULT_URL
+ if not url:
+ raise NameError('HTTP Lister Transport requires an url.')
+ self.url = url # eg. 'https://api.github.com'
self.session = requests.Session()
self.lister_version = __version__
@@ -218,7 +218,7 @@
"parse for information")
PATH_TEMPLATE = None # we do not use it
- def __init__(self, api_baseurl=None):
+ def __init__(self, url=None):
self.session = requests.Session()
self.lister_version = __version__
diff --git a/swh/lister/core/page_by_page_lister.py b/swh/lister/core/page_by_page_lister.py
--- a/swh/lister/core/page_by_page_lister.py
+++ b/swh/lister/core/page_by_page_lister.py
@@ -155,6 +155,6 @@
combining PageByPageLister and ListerHttpTransport.
"""
- def __init__(self, api_baseurl=None, override_config=None):
+ def __init__(self, url=None, override_config=None):
PageByPageLister.__init__(self, override_config=override_config)
- ListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
+ ListerHttpTransport.__init__(self, url=url)
diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py
--- a/swh/lister/core/tests/test_lister.py
+++ b/swh/lister/core/tests/test_lister.py
@@ -77,7 +77,7 @@
"""
if override_config or self.fl is None:
- self.fl = self.Lister(api_baseurl='https://fakeurl',
+ self.fl = self.Lister(url='https://fakeurl',
override_config=override_config)
self.fl.INITIAL_BACKOFF = 1
diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py
--- a/swh/lister/debian/lister.py
+++ b/swh/lister/debian/lister.py
@@ -35,7 +35,7 @@
instance = 'debian'
def __init__(self, override_config=None):
- ListerHttpTransport.__init__(self, api_baseurl="bogus")
+ ListerHttpTransport.__init__(self, url="notused")
ListerBase.__init__(self, override_config=override_config)
def transport_request(self, identifier):
diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py
--- a/swh/lister/github/tasks.py
+++ b/swh/lister/github/tasks.py
@@ -12,20 +12,16 @@
GROUP_SPLIT = 10000
-def new_lister(api_baseurl='https://api.github.com', **kw):
- return GitHubLister(api_baseurl=api_baseurl, **kw)
-
-
@app.task(name=__name__ + '.IncrementalGitHubLister')
def list_github_incremental(**lister_args):
'Incremental update of GitHub'
- lister = new_lister(**lister_args)
+ lister = GitHubLister(**lister_args)
lister.run(min_bound=lister.db_last_index(), max_bound=None)
@app.task(name=__name__ + '.RangeGitHubLister')
def _range_github_lister(start, end, **lister_args):
- lister = new_lister(**lister_args)
+ lister = GitHubLister(**lister_args)
lister.run(min_bound=start, max_bound=end)
@@ -36,7 +32,7 @@
It's not to be called for an initial listing.
"""
- lister = new_lister(**lister_args)
+ lister = GitHubLister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info('Nothing to list')
diff --git a/swh/lister/github/tests/test_tasks.py b/swh/lister/github/tests/test_tasks.py
--- a/swh/lister/github/tests/test_tasks.py
+++ b/swh/lister/github/tests/test_tasks.py
@@ -26,7 +26,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(api_baseurl='https://api.github.com')
+ lister.assert_called_once_with()
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
@@ -44,7 +44,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(api_baseurl='https://api.github.com')
+ lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@@ -74,7 +74,7 @@
break
sleep(1)
- lister.assert_called_with(api_baseurl='https://api.github.com')
+ lister.assert_called_with()
# one by the FullGitHubRelister task
# + 5 for the RangeGitHubLister subtasks
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -16,12 +16,11 @@
MODEL = GitLabModel
LISTER_NAME = 'gitlab'
- def __init__(self, api_baseurl=None, instance=None,
+ def __init__(self, url=None, instance=None,
override_config=None, sort='asc', per_page=20):
- super().__init__(api_baseurl=api_baseurl,
- override_config=override_config)
+ super().__init__(url=url, override_config=override_config)
if instance is None:
- instance = parse_url(self.api_baseurl).host
+ instance = parse_url(self.url).host
self.instance = instance
self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % (
self.PATH_TEMPLATE, sort, per_page)
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
--- a/swh/lister/gitlab/tasks.py
+++ b/swh/lister/gitlab/tasks.py
@@ -14,18 +14,11 @@
NBPAGES = 10
-def new_lister(api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='asc', per_page=20):
- return GitLabLister(
- api_baseurl=api_baseurl, instance=instance, sort=sort,
- per_page=per_page)
-
-
@app.task(name=__name__ + '.IncrementalGitLabLister')
def list_gitlab_incremental(**lister_args):
"""Incremental update of a GitLab instance"""
lister_args['sort'] = 'desc'
- lister = new_lister(**lister_args)
+ lister = GitLabLister(**lister_args)
total_pages = lister.get_pages_information()[1]
# stopping as soon as existing origins for that instance are detected
lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
@@ -33,14 +26,14 @@
@app.task(name=__name__ + '.RangeGitLabLister')
def _range_gitlab_lister(start, end, **lister_args):
- lister = new_lister(**lister_args)
+ lister = GitLabLister(**lister_args)
lister.run(min_bound=start, max_bound=end)
@app.task(name=__name__ + '.FullGitLabRelister', bind=True)
def list_gitlab_full(self, **lister_args):
"""Full update of a GitLab instance"""
- lister = new_lister(**lister_args)
+ lister = GitLabLister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
ranges = list(utils.split_range(total_pages, NBPAGES))
random.shuffle(ranges)
diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py
--- a/swh/lister/gitlab/tests/test_tasks.py
+++ b/swh/lister/gitlab/tests/test_tasks.py
@@ -26,9 +26,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='desc', per_page=20)
+ lister.assert_called_once_with(sort='desc')
lister.db_last_index.assert_not_called()
lister.get_pages_information.assert_called_once_with()
lister.run.assert_called_once_with(
@@ -48,9 +46,7 @@
res.wait()
assert res.successful()
- lister.assert_called_once_with(
- api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='asc', per_page=20)
+ lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@@ -81,9 +77,7 @@
break
sleep(1)
- lister.assert_called_with(
- api_baseurl='https://gitlab.com/api/v4',
- instance=None, sort='asc', per_page=20)
+ lister.assert_called_with()
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks
@@ -113,7 +107,7 @@
res = swh_app.send_task(
'swh.lister.gitlab.tasks.FullGitLabRelister',
- kwargs=dict(api_baseurl='https://0xacab.org/api/v4'))
+ kwargs=dict(url='https://0xacab.org/api/v4'))
assert res
res.wait()
@@ -129,9 +123,7 @@
break
sleep(1)
- lister.assert_called_with(
- api_baseurl='https://0xacab.org/api/v4',
- instance=None, sort='asc', per_page=20)
+ lister.assert_called_with(url='https://0xacab.org/api/v4')
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -16,10 +16,9 @@
LISTER_NAME = 'npm'
instance = 'npm'
- def __init__(self, api_baseurl='https://replicate.npmjs.com',
+ def __init__(self, url='https://replicate.npmjs.com',
per_page=1000, override_config=None):
- super().__init__(api_baseurl=api_baseurl,
- override_config=override_config)
+ super().__init__(url=url, override_config=override_config)
self.per_page = per_page + 1
self.PATH_TEMPLATE += '&limit=%s' % self.per_page
@@ -76,7 +75,7 @@
'https://www.npmjs.com/package/%s' % repo_name,
# package metadata url needs to be escaped otherwise some requests
# may fail (for instance when a package name contains '/')
- '%s/%s' % (self.api_baseurl, quote(repo_name, safe=''))
+ '%s/%s' % (self.url, quote(repo_name, safe=''))
)
def string_pattern_check(self, inner, lower, upper=None):
diff --git a/swh/lister/npm/tasks.py b/swh/lister/npm/tasks.py
--- a/swh/lister/npm/tasks.py
+++ b/swh/lister/npm/tasks.py
@@ -14,7 +14,7 @@
@contextmanager
def save_registry_state(lister):
params = {'headers': lister.request_headers()}
- registry_state = lister.session.get(lister.api_baseurl, **params)
+ registry_state = lister.session.get(lister.url, **params)
registry_state = registry_state.json()
keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq',
'disk_size', 'data_size', 'committed_update_seq',
diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py
--- a/swh/lister/phabricator/lister.py
+++ b/swh/lister/phabricator/lister.py
@@ -20,11 +20,10 @@
MODEL = PhabricatorModel
LISTER_NAME = 'phabricator'
- def __init__(self, api_baseurl=None, instance=None, override_config=None):
- super().__init__(api_baseurl=api_baseurl,
- override_config=override_config)
+ def __init__(self, url=None, instance=None, override_config=None):
+ super().__init__(url=url, override_config=override_config)
if not instance:
- instance = urllib.parse.urlparse(self.api_baseurl).hostname
+ instance = urllib.parse.urlparse(self.url).hostname
self.instance = instance
@property
diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py
--- a/swh/lister/phabricator/tests/test_lister.py
+++ b/swh/lister/phabricator/tests/test_lister.py
@@ -33,9 +33,8 @@
]}}
override_config = dict(credentials=credentials,
**(override_config or {}))
- self.fl = self.Lister(
- api_baseurl='https://fakeurl', instance='fake',
- override_config=override_config)
+ self.fl = self.Lister(url='https://fakeurl', instance='fake',
+ override_config=override_config)
self.fl.INITIAL_BACKOFF = 1
self.fl.reset_backoff()
diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py
--- a/swh/lister/tests/test_cli.py
+++ b/swh/lister/tests/test_cli.py
@@ -36,31 +36,31 @@
db_url = init_db().url()
listers = {
- 'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'),
- 'phabricator': (
- 'api_baseurl',
- 'https://somewhere.org/api/diffusion.repository.search'),
+ 'gitlab': 'https://other.gitlab.uni/api/v4/',
+ 'phabricator': 'https://somewhere.org/api/diffusion.repository.search',
+ 'cgit': 'https://some.where/cgit',
}
# check the override ends up defined in the lister
- for lister_name, (url_key, url_value) in listers.items():
+ for lister_name, url in listers.items():
lst = get_lister(
lister_name, db_url, **{
- url_key: url_value,
+ 'url': url,
'priority': 'high',
'policy': 'oneshot',
})
- assert getattr(lst, url_key) == url_value
+ assert lst.url == url
assert lst.config['priority'] == 'high'
assert lst.config['policy'] == 'oneshot'
# check the default urls are used and not the override (since it's not
# passed)
- for lister_name, (url_key, url_value) in listers.items():
+ for lister_name, url in listers.items():
lst = get_lister(lister_name, db_url)
# no override so this does not end up in lister's configuration
- assert url_key not in lst.config
+ assert 'url' not in lst.config
assert 'priority' not in lst.config
assert 'oneshot' not in lst.config
+ assert lst.url == lst.DEFAULT_URL
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jan 30 2025, 10:52 AM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222772
Attached To
D1950: tasks: normalize the url argument name of most lister
Event Timeline
Log In to Comment