Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/algos/revisions_walker.py
Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | The iteration step performs the following operations: | ||||
method :meth:`should_return` and returns it if | method :meth:`should_return` and returns it if | ||||
it is the case | it is the case | ||||
In order to easily instantiate a specific type of revisions | In order to easily instantiate a specific type of revisions | ||||
walker, it is recommended to use the factory function | walker, it is recommended to use the factory function | ||||
:func:`get_revisions_walker`. | :func:`get_revisions_walker`. | ||||
Args: | Args: | ||||
storage (swh.storage.storage.Storage): instance of swh storage | storage (swh.storage.interface.StorageInterface): instance of swh storage | ||||
(either local or remote) | (either local or remote) | ||||
rev_start (bytes): a revision identifier | rev_start (bytes): a revision identifier | ||||
max_revs (Optional[int]): maximum number of revisions to return | max_revs (Optional[int]): maximum number of revisions to return | ||||
state (Optional[dict]): previous state of that revisions walker | state (Optional[dict]): previous state of that revisions walker | ||||
""" | """ | ||||
def __init__(self, storage, rev_start, max_revs=None, state=None): | def __init__(self, storage, rev_start, max_revs=None, state=None): | ||||
self._revs_to_visit = [] | self._revs_to_visit = [] | ||||
Show All 31 Lines | def get_next_rev_id(self): | ||||
""" | """ | ||||
Abstract method whose purpose is to return the next revision | Abstract method whose purpose is to return the next revision | ||||
during the iteration. | during the iteration. | ||||
Derived classes must implement it according to the desired | Derived classes must implement it according to the desired | ||||
method to walk across the revisions history. | method to walk across the revisions history. | ||||
Returns: | Returns: | ||||
dict: A dict describing a revision as returned by | dict: A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
pass | pass | ||||
def process_parent_revs(self, rev): | def process_parent_revs(self, rev): | ||||
""" | """ | ||||
Process the parents of a revision when it is iterated. | Process the parents of a revision when it is iterated. | ||||
The default implementation simply calls :meth:`process_rev` | The default implementation simply calls :meth:`process_rev` | ||||
for each parent revision in the order they are declared. | for each parent revision in the order they are declared. | ||||
Args: | Args: | ||||
rev (dict): A dict describing a revision as returned by | rev (dict): A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
for parent_id in rev["parents"]: | for parent_id in rev["parents"]: | ||||
self.process_rev(parent_id) | self.process_rev(parent_id) | ||||
def should_return(self, rev): | def should_return(self, rev): | ||||
""" | """ | ||||
Filter out a revision to return if needed. | Filter out a revision to return if needed. | ||||
Default implementation returns all iterated revisions. | Default implementation returns all iterated revisions. | ||||
Args: | Args: | ||||
rev (dict): A dict describing a revision as returned by | rev (dict): A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
Returns: | Returns: | ||||
bool: Whether to return the revision in the iteration | bool: Whether to return the revision in the iteration | ||||
""" | """ | ||||
return True | return True | ||||
def is_finished(self): | def is_finished(self): | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | class CommitterDateRevisionsWalker(RevisionsWalker): | ||||
def get_next_rev_id(self): | def get_next_rev_id(self): | ||||
""" | """ | ||||
Return the smallest revision from the priority queue, i.e. | Return the smallest revision from the priority queue, i.e. | ||||
the one with highest committer date. | the one with highest committer date. | ||||
Returns: | Returns: | ||||
dict: A dict describing a revision as returned by | dict: A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
_, rev_id = heapq.heappop(self._revs_to_visit) | _, rev_id = heapq.heappop(self._revs_to_visit) | ||||
return rev_id | return rev_id | ||||
class BFSRevisionsWalker(RevisionsWalker): | class BFSRevisionsWalker(RevisionsWalker): | ||||
""" | """ | ||||
Revisions walker that returns revisions in the same order | Revisions walker that returns revisions in the same order | ||||
Show All 18 Lines | def process_rev(self, rev_id): | ||||
self._revs_to_visit.append(rev_id) | self._revs_to_visit.append(rev_id) | ||||
def get_next_rev_id(self): | def get_next_rev_id(self): | ||||
""" | """ | ||||
Return the next revision from the queue. | Return the next revision from the queue. | ||||
Returns: | Returns: | ||||
dict: A dict describing a revision as returned by | dict: A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
return self._revs_to_visit.popleft() | return self._revs_to_visit.popleft() | ||||
class DFSPostRevisionsWalker(RevisionsWalker): | class DFSPostRevisionsWalker(RevisionsWalker): | ||||
""" | """ | ||||
Revisions walker that returns revisions in the same order | Revisions walker that returns revisions in the same order | ||||
as when performing a depth-first search in post-order on the | as when performing a depth-first search in post-order on the | ||||
Show All 15 Lines | def process_rev(self, rev_id): | ||||
self._revs_to_visit.append(rev_id) | self._revs_to_visit.append(rev_id) | ||||
def get_next_rev_id(self): | def get_next_rev_id(self): | ||||
""" | """ | ||||
Return the next revision from the stack. | Return the next revision from the stack. | ||||
Returns: | Returns: | ||||
dict: A dict describing a revision as returned by | dict: A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
return self._revs_to_visit.pop() | return self._revs_to_visit.pop() | ||||
class DFSRevisionsWalker(DFSPostRevisionsWalker): | class DFSRevisionsWalker(DFSPostRevisionsWalker): | ||||
""" | """ | ||||
Revisions walker that returns revisions in the same order | Revisions walker that returns revisions in the same order | ||||
as when performing a depth-first search in pre-order on the | as when performing a depth-first search in pre-order on the | ||||
revisions DAG (i.e. after visiting a merge commit, | revisions DAG (i.e. after visiting a merge commit, | ||||
the base commit it was merged on will be visited before | the base commit it was merged on will be visited before | ||||
the merged commit). | the merged commit). | ||||
""" | """ | ||||
rw_type = "dfs" | rw_type = "dfs" | ||||
def process_parent_revs(self, rev): | def process_parent_revs(self, rev): | ||||
""" | """ | ||||
Process the parents of a revision when it is iterated in | Process the parents of a revision when it is iterated in | ||||
the reversed order they are declared. | the reversed order they are declared. | ||||
Args: | Args: | ||||
rev (dict): A dict describing a revision as returned by | rev (dict): A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
for parent_id in reversed(rev["parents"]): | for parent_id in reversed(rev["parents"]): | ||||
self.process_rev(parent_id) | self.process_rev(parent_id) | ||||
class PathRevisionsWalker(CommitterDateRevisionsWalker): | class PathRevisionsWalker(CommitterDateRevisionsWalker): | ||||
""" | """ | ||||
Revisions walker that returns revisions where a specific | Revisions walker that returns revisions where a specific | ||||
Show All 10 Lines | class PathRevisionsWalker(CommitterDateRevisionsWalker): | ||||
will stop once a revision where the path has been added is found. | will stop once a revision where the path has been added is found. | ||||
.. warning:: Due to client-side implementation, performances | .. warning:: Due to client-side implementation, performances | ||||
are not optimal when the total numbers of revisions to walk | are not optimal when the total numbers of revisions to walk | ||||
is large. This should only be used when the total number of | is large. This should only be used when the total number of | ||||
revisions does not exceed a couple of thousands. | revisions does not exceed a couple of thousands. | ||||
Args: | Args: | ||||
storage (swh.storage.storage.Storage): instance of swh storage | storage (swh.storage.interface.StorageInterface): instance of swh storage | ||||
(either local or remote) | (either local or remote) | ||||
rev_start (bytes): a revision identifier | rev_start (bytes): a revision identifier | ||||
path (str): the path in the source tree to retrieve the history | path (str): the path in the source tree to retrieve the history | ||||
max_revs (Optional[int]): maximum number of revisions to return | max_revs (Optional[int]): maximum number of revisions to return | ||||
state (Optional[dict]): previous state of that revisions walker | state (Optional[dict]): previous state of that revisions walker | ||||
""" | """ | ||||
rw_type = "path" | rw_type = "path" | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | def process_parent_revs(self, rev): | ||||
manner as ``git log``. When a revision has multiple parents, | manner as ``git log``. When a revision has multiple parents, | ||||
the following process is applied. If the revision was a merge, | the following process is applied. If the revision was a merge, | ||||
and has the same path identifier to one parent, follow only that | and has the same path identifier to one parent, follow only that | ||||
parent (even if there are several parents with the same path | parent (even if there are several parents with the same path | ||||
identifier, follow only one of them.) Otherwise, follow all parents. | identifier, follow only one of them.) Otherwise, follow all parents. | ||||
Args: | Args: | ||||
rev (dict): A dict describing a revision as returned by | rev (dict): A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
""" | """ | ||||
rev_path_id = self._get_path_id(rev["id"]) | rev_path_id = self._get_path_id(rev["id"]) | ||||
if rev_path_id: | if rev_path_id: | ||||
if len(rev["parents"]) == 1: | if len(rev["parents"]) == 1: | ||||
self.process_rev(rev["parents"][0]) | self.process_rev(rev["parents"][0]) | ||||
else: | else: | ||||
parent_rev_path_ids = [ | parent_rev_path_ids = [ | ||||
Show All 15 Lines | def should_return(self, rev): | ||||
Check if a revision should be returned when iterating. | Check if a revision should be returned when iterating. | ||||
It verifies that the specified path has been modified | It verifies that the specified path has been modified | ||||
by the revision but also that all parents have a path | by the revision but also that all parents have a path | ||||
identifier different from the revision one in order | identifier different from the revision one in order | ||||
to get a simplified history. | to get a simplified history. | ||||
Args: | Args: | ||||
rev (dict): A dict describing a revision as returned by | rev (dict): A dict describing a revision as returned by | ||||
:meth:`swh.storage.storage.Storage.revision_get` | :meth:`swh.storage.interface.StorageInterface.revision_get` | ||||
Returns: | Returns: | ||||
bool: Whether to return the revision in the iteration | bool: Whether to return the revision in the iteration | ||||
""" | """ | ||||
rev_path_id = self._get_path_id(rev["id"]) | rev_path_id = self._get_path_id(rev["id"]) | ||||
if not rev["parents"]: | if not rev["parents"]: | ||||
return rev_path_id is not None | return rev_path_id is not None | ||||
▲ Show 20 Lines • Show All 64 Lines • Show Last 20 Lines |