Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066475
D1632.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
D1632.id.diff
View Options
diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -2,10 +2,11 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import datetime
import logging
import iso8601
+from datetime import datetime
+
from urllib import parse
from swh.lister.bitbucket.models import BitBucketModel
@@ -23,6 +24,7 @@
MODEL = BitBucketModel
LISTER_NAME = 'bitbucket'
instance = 'bitbucket'
+ default_min_bound = datetime.utcfromtimestamp(0).isoformat()
def __init__(self, api_baseurl, override_config=None, per_page=100):
super().__init__(
@@ -54,21 +56,6 @@
repos = response.json()['values']
return [self.get_model_from_repo(repo) for repo in repos]
- def db_first_index(self):
- """For the first time listing, there is no data in db, so fallback to the
- bitbucket starting year.
-
- """
- return super().db_first_index() or '2008-01-01T00:00:00Z'
-
- def db_last_index(self):
- """For the first time listing, there is no data in db, so fallback to the time
- of the first run as max date.
-
- """
- return super().db_last_index() or datetime.datetime.now(
- tz=datetime.timezone.utc).isoformat()
-
def request_uri(self, identifier):
return super().request_uri(identifier or '1970-01-01')
diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py
--- a/swh/lister/bitbucket/tasks.py
+++ b/swh/lister/bitbucket/tasks.py
@@ -30,8 +30,17 @@
@app.task(name=__name__ + '.FullBitBucketRelister', bind=True)
def full_bitbucket_relister(self, split=None, **lister_args):
+ """Relist from the beginning of what's already been listed.
+
+ It's not to be called for an initial listing.
+
+ """
lister = new_lister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
+ if not ranges:
+ self.log.info('Nothing to list')
+ return
+
random.shuffle(ranges)
promise = group(range_bitbucket_lister.s(minv, maxv, **lister_args)
for minv, maxv in ranges)()
diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py
--- a/swh/lister/core/indexing_lister.py
+++ b/swh/lister/core/indexing_lister.py
@@ -17,6 +17,7 @@
class IndexingLister(ListerBase):
flush_packet_db = 20
+ default_min_bound = ''
"""Lister* intermediate class for any service that follows the pattern:
- The service must report at least one stable unique identifier, known
@@ -95,17 +96,18 @@
def db_partition_indices(self, partition_size):
"""Describe an index-space compartmentalization of the db table
- in equal sized chunks. This is used to describe min&max bounds for
- parallelizing fetch tasks.
+ in equal sized chunks. This is used to describe min&max bounds for
+ parallelizing fetch tasks.
Args:
partition_size (int): desired size to make each partition
+
Returns:
a list of tuples (begin, end) of indexable value that
- declare approximately equal-sized ranges of existing
- repos
- """
+ declare approximately equal-sized ranges of existing
+ repos
+ """
n = max(self.db_num_entries(), 10)
partition_size = min(partition_size, n)
n_partitions = n // partition_size
@@ -114,7 +116,8 @@
max_index = self.db_last_index()
if not min_index or not max_index:
- raise ValueError("Can't partition an empty range")
+ # Nothing to list
+ return []
if isinstance(min_index, str):
def format_bound(bound):
@@ -201,7 +204,7 @@
self.max_index = max_bound
def ingest_indexes():
- index = min_bound or ''
+ index = min_bound or self.default_min_bound
for i in count(1):
response, injected_repos = self.ingest_data(index)
if not response and not injected_repos:
diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py
--- a/swh/lister/github/tasks.py
+++ b/swh/lister/github/tasks.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 the Software Heritage developers
+# Copyright (C) 2017-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -30,8 +30,16 @@
@app.task(name=__name__ + '.FullGitHubRelister', bind=True)
def full_github_relister(self, split=None, **lister_args):
+ """Relist from the beginning of what's already been listed.
+
+ It's not to be called for an initial listing.
+
+ """
lister = new_lister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
+ if not ranges:
+ self.log.info('Nothing to list')
+ return
random.shuffle(ranges)
promise = group(range_github_lister.s(minv, maxv, **lister_args)
for minv, maxv in ranges)()
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
--- a/swh/lister/gitlab/tasks.py
+++ b/swh/lister/gitlab/tasks.py
@@ -38,6 +38,11 @@
@app.task(name=__name__ + '.FullGitLabRelister', bind=True)
def full_gitlab_relister(self, **lister_args):
+ """Full lister
+
+ This should be renamed as such.
+
+ """
lister = new_lister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
ranges = list(utils.split_range(total_pages, NBPAGES))
diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py
--- a/swh/lister/phabricator/lister.py
+++ b/swh/lister/phabricator/lister.py
@@ -31,6 +31,14 @@
super().__init__(api_baseurl=api_baseurl,
override_config=override_config)
+ @property
+ def default_min_bound(self):
+ """Starting boundary when `min_bound` is not defined (db empty). This
+ is used within the fn:`run` call.
+
+ """
+ return self._bootstrap_repositories_listing()
+
def _build_query_params(self, params, api_token):
"""Build query params to include the forge's api token
@@ -134,21 +142,6 @@
self.schedule_missing_tasks(models_list, injected)
return self.max_index
- def run(self, min_bound=None, max_bound=None):
- """
- (Override) Run the lister on the specified Phabricator instance
-
- Args:
- min_bound (int): Optional repository index to start the listing
- after it
- max_bound (int): Optional repository index to stop the listing
- after it
- """
- # initial call to the lister, we need to bootstrap it in that case
- if min_bound is None:
- min_bound = self._bootstrap_repositories_listing()
- super().run(min_bound, max_bound)
-
def get_repo_url(attachments):
"""
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 11:44 AM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227912
Attached To
D1632: bitbucket: Allow incremental lister to start properly the first time
Event Timeline
Log In to Comment