Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/algos/revisions_walker.py
Show First 20 Lines • Show All 135 Lines • ▼ Show 20 Lines | def is_finished(self): | ||||
""" | """ | ||||
if self._max_revs is not None and self._num_revs >= self._max_revs: | if self._max_revs is not None and self._num_revs >= self._max_revs: | ||||
return True | return True | ||||
if not self._revs_to_visit: | if not self._revs_to_visit: | ||||
return True | return True | ||||
return False | return False | ||||
def _get_rev(self, rev_id): | def _get_rev(self, rev_id): | ||||
rev = self._revs.get(rev_id, None) | rev = self._revs.get(rev_id) | ||||
if not rev: | if rev is None: | ||||
# cache some revisions in advance to avoid sending too much | # cache some revisions in advance to avoid sending too much | ||||
# requests to storage and thus speedup the revisions walk | # requests to storage and thus speedup the revisions walk | ||||
for rev in self.storage.revision_log([rev_id], limit=100): | for rev in self.storage.revision_log([rev_id], limit=100): | ||||
olasd: maybe add a comment here saying that the history got truncated? | |||||
Done Inline ActionsSure, will improve that anlambert: Sure, will improve that | |||||
# revision data is missing, returned history will be truncated | |||||
if rev is None: | |||||
continue | |||||
self._revs[rev['id']] = rev | self._revs[rev['id']] = rev | ||||
return self._revs[rev_id] | return self._revs.get(rev_id) | ||||
def export_state(self): | def export_state(self): | ||||
""" | """ | ||||
Export the internal state of that revision walker to a dict. | Export the internal state of that revision walker to a dict. | ||||
Its purpose is to continue the iteration in a pagination context. | Its purpose is to continue the iteration in a pagination context. | ||||
Returns: | Returns: | ||||
dict: A dict containing the internal state of that revisions walker | dict: A dict containing the internal state of that revisions walker | ||||
""" | """ | ||||
return { | return { | ||||
'revs_to_visit': self._revs_to_visit, | 'revs_to_visit': self._revs_to_visit, | ||||
'done': self._done, | 'done': self._done, | ||||
'last_rev': self._last_rev, | 'last_rev': self._last_rev, | ||||
'num_revs': self._num_revs | 'num_revs': self._num_revs | ||||
} | } | ||||
def __next__(self): | def __next__(self): | ||||
if self.is_finished(): | if self.is_finished(): | ||||
raise StopIteration | raise StopIteration | ||||
while self._revs_to_visit: | while self._revs_to_visit: | ||||
rev_id = self.get_next_rev_id() | rev_id = self.get_next_rev_id() | ||||
if rev_id in self._done: | if rev_id in self._done: | ||||
continue | continue | ||||
self._done.add(rev_id) | self._done.add(rev_id) | ||||
rev = self._get_rev(rev_id) | rev = self._get_rev(rev_id) | ||||
Not Done Inline ActionsSame here ? olasd: Same here ? | |||||
# revision data is missing, returned history will be truncated | |||||
if rev is None: | |||||
continue | |||||
self.process_parent_revs(rev) | self.process_parent_revs(rev) | ||||
if self.should_return(rev): | if self.should_return(rev): | ||||
self._num_revs += 1 | self._num_revs += 1 | ||||
self._last_rev = rev | self._last_rev = rev | ||||
return rev | return rev | ||||
raise StopIteration | raise StopIteration | ||||
def __iter__(self): | def __iter__(self): | ||||
Show All 12 Lines | def process_rev(self, rev_id): | ||||
""" | """ | ||||
Add the revision to a priority queue according to the committer date. | Add the revision to a priority queue according to the committer date. | ||||
Args: | Args: | ||||
rev_id (bytes): the newly visited revision identifier | rev_id (bytes): the newly visited revision identifier | ||||
""" | """ | ||||
if rev_id not in self._done: | if rev_id not in self._done: | ||||
rev = self._get_rev(rev_id) | rev = self._get_rev(rev_id) | ||||
if rev is not None: | |||||
commit_time = rev['committer_date']['timestamp']['seconds'] | commit_time = rev['committer_date']['timestamp']['seconds'] | ||||
heapq.heappush(self._revs_to_visit, (-commit_time, rev_id)) | heapq.heappush(self._revs_to_visit, (-commit_time, rev_id)) | ||||
def get_next_rev_id(self): | def get_next_rev_id(self): | ||||
""" | """ | ||||
Return the smallest revision from the priority queue, i.e. | Return the smallest revision from the priority queue, i.e. | ||||
the one with highest committer date. | the one with highest committer date. | ||||
Returns: | Returns: | ||||
dict: A dict describing a revision as returned by | dict: A dict describing a revision as returned by | ||||
▲ Show 20 Lines • Show All 300 Lines • Show Last 20 Lines |
maybe add a comment here saying that the history got truncated?