Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122792
D367.id1174.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D367.id1174.diff
View Options
diff --git a/swh/lister/core/paging_lister.py b/swh/lister/core/paging_lister.py
--- a/swh/lister/core/paging_lister.py
+++ b/swh/lister/core/paging_lister.py
@@ -79,7 +79,16 @@
# You probably don't need to override anything below this line.
+ def check_existence(self, injected_repos):
+ """Given a list of injected repos, check if we already have them.
+
+ """
+ # FIXME: Implement the check
+ return False
+
+<<<<<<< HEAD
def run(self, min_bound=None, max_bound=None):
+=======
"""Main entry function. Sequentially fetches repository data from the
service according to the basic outline in the class
docstring. Continually fetching sublists until either there
@@ -89,6 +98,9 @@
Args:
min_bound: optional page to start from
max_bound: optional page to stop at
+ check_existence (bool): optional existence check (for
+ incremental lister whose sort
+ order is inverted)
Returns:
nothing
@@ -99,6 +111,7 @@
self.min_page = min_bound
self.max_page = max_bound
+ already_seen = False
while self.is_within_bounds(page, self.min_page, self.max_page):
logging.info('listing repos starting at %s' % page)
@@ -106,12 +119,18 @@
response, injected_repos = self.ingest_data(page)
next_page = self.get_next_target_from_response(response)
+ if check_existence:
+ already_seen = self.check_existence(injected_repos)
+
# termination condition
if (next_page is None) or (next_page == page):
logging.info('stopping after page %s, no next link found' %
page)
break
+ elif already_seen:
+ logging.info('Repositories already seen, stopping')
+ break
else:
page = next_page
diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -12,7 +12,7 @@
class GitLabLister(PageByPageHttpLister):
# Template path expecting an integer that represents the page id
- PATH_TEMPLATE = '/projects?page=%d&order_by=id&sort=asc&simple=true'
+ PATH_TEMPLATE = '/projects?page=%d&order_by=id'
API_URL_INDEX_RE = re.compile(r'^.*/projects.*page=(\d+).*')
MODEL = GitLabModel
LISTER_NAME = 'gitlab'
@@ -103,7 +103,7 @@
return None
def get_pages_information(self):
- """Determine some pages information.
+ """Determine pages information.
"""
response = self.transport_head(identifier=1)
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
--- a/swh/lister/gitlab/tasks.py
+++ b/swh/lister/gitlab/tasks.py
@@ -17,13 +17,16 @@
class RangeGitLabLister(GitLabListerTask, RangeListerTask):
- """GitLab lister working on specified range (start, end) arguments.
+ """Range GitLab lister (list available origins on specified range)
"""
task_queue = 'swh_lister_gitlab_refresh'
class FullGitLabRelister(GitLabListerTask):
+ """Full GitLab lister (list all available origins from the api).
+
+ """
task_queue = 'swh_lister_gitlab_refresh'
def run_task(self, *args, **kwargs):
@@ -41,3 +44,22 @@
range_task = RangeGitLabLister()
group(range_task.s(minv, maxv, *args, **kwargs)
for minv, maxv in ranges)()
+
+
+class IncrementalGitLabLister(ListerTaskBase):
+ """Incremental GitLab lister (list only new available origins).
+
+ """
+ task_queue = 'swh_lister_gitlab_discover'
+
+ def new_lister(self, api_baseurl='https://gitlab.com/api/v4',
+ instance='gitlab.com',):
+ # will invert the order of the lister's result
+ return GitLabLister(instance=instance, api_baseurl=api_baseurl,
+ sort='desc')
+
+ def run_task(self, *args, **kwargs):
+ lister = self.new_lister(*args, **kwargs)
+ # will check for existing data and exit when found
+ return lister.run(min_bound=None, max_bound=None,
+ check_for_presence=True)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 17 2024, 12:49 AM (4 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230594
Attached To
D367: swh.lister.core: Make gitlab lister a paging lister instance
Event Timeline
Log In to Comment